





S.isalnum() -> bool


实例:>>> string = "Special $#! characters spaces 888323">>> ''.join(e for e in string if e.isalnum())'Specialcharactersspaces888323'



string.punctuationimport re, strings ="string. With. Punctuation?" # Sample string # 写法一:out = s.translate(string.maketrans("",""), string.punctuation)# 写法二:out = s.translate(None, string.punctuation)# 写法三:exclude = set(string.punctuation)out = ''.join(ch for ch in s if ch not in exclude)# 写法四:>>> for c in string.punctuation:s = s.replace(c,"")>>> s'string With Punctuation'# 写法五:out = re.sub('[%s]' % re.escape(string.punctuation), '', s)## re.escape:对字符串中所有可能被解释为正则运算符的字符进行转义# 写法六:# string.punctuation 只包括 ascii 格式; 想要一个包含更广(但是更慢)的方法是使用: unicodedata module :from unicodedata import categorys = u'String — with - «Punctuation »...'out = re.sub('[%s]' % re.escape(string.punctuation), '', s)print 'Stripped', out# 输出:u'Stripped String \u2014 with \xabPunctuation \xbb'out = ''.join(ch for ch in s if category(ch)[0] != 'P')print 'Stripped', out# 输出:u'Stripped String with Punctuation '# For Python 3 str or Python 2 unicode values, str.translate() only takes a dictionary; codepoints (integers) are looked up in that mapping and anything mapped to None is removed.# To remove (some?) punctuation then, use:import stringremove_punct_map = dict.fromkeys(map(ord, string.punctuation))s.translate(remove_punct_map)# Your method doesn't work in Python 3, as the translate method doesn't accept the second argument any more. import unicodedataimport systbl = dict.fromkeys(i for i in range(sys.maxunicode) if unicodedata.category(chr(i)).startswith('P'))def remove_punctuation(text):return text.translate(tbl)



例:import res ="string. With. Punctuation?"s = re.sub(r'[^\w\s]','',s)

测试:import re, string, timeits ="string. With. Punctuation"exclude = set(string.punctuation)table = string.maketrans("","")regex = re.compile('[%s]' % re.escape(string.punctuation))def test_set(s):return ''.join(ch for ch in s if ch not in exclude)def test_re(s): return regex.sub('', s)def test_trans(s):return s.translate(table, string.punctuation)def test_repl(s):for c in string.punctuation:s=s.replace(c,"")return sprint"sets :",timeit.Timer('f(s)', 'from __main__ import s,test_set as f').timeit(1000000)print"regex :",timeit.Timer('f(s)', 'from __main__ import s,test_re as f').timeit(1000000)print"translate :",timeit.Timer('f(s)', 'from __main__ import s,test_trans as f').timeit(1000000)print"replace :",timeit.Timer('f(s)', 'from __main__ import s,test_repl as f').timeit(1000000)out_put:# sets : 19.8566138744# regex : 6.86155414581# translate : 2.12455511093# replace : 28.4436721802



