

In [1]: line = 'asdf fjdk; afed, fjek,asdf,    foo'
In [2]: import re
In [3]: re.split(r'[;,\s]\s*',line)
Out[3]: ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']
In [4]: re.split(r'(;|,|\s)\s*',line)
Out[4]: ['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']
In [5]: re.split(r'(?:,|;|\s)\s*',line)
Out[5]: ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']


In [6]: url = 'https://www.baidu.com'
In [7]: url.endswith('.com')
Out[7]: True
In [8]: url.endswith('.cn')
Out[8]: False
In [10]: url.startswith('https:')
Out[10]: True
In [11]: url.startswith('http:')
Out[11]: FalseIn [1]: import os
In [2]: filenames = os.listdir('.')
In [3]: filenames
In [4]: [i for i in filenames if i.startswith('.')]
In [5]: [i for i in filenames if i.endswith(('.py','.gz','.tgz'))]
Out[5]: ['Python-3.7.0.tgz', 'mysql-boost-8.0.12.tar.gz', 'heapq_queue.py']
In [6]: any(i.endswith('.py') for i in filenames)
Out[6]: True
from urllib.request import urlopendef read_date(name):if name.startswith(('http:','https:','ftp:')):return urlopen(name).read().decode()else:with open(name) as f:return f.read()result = read_date('test.txt')
In [9]: import reIn [10]: re.match('http:|https:|ftp:','https://www.baidu.com')
Out[10]: <re.Match object; span=(0, 6), match='https:'>


In [12]: from fnmatch import fnmatch,fnmatchcaseIn [13]: fnmatch('foo.txt','*.txt')
Out[13]: TrueIn [14]: fnmatch('foo.txt','?oo.txt')
Out[14]: TrueIn [15]: names = ['dat01.csv','dat99.csv','config.ini','foo.py']In [18]: [i for i in names if fnmatch(i,'dat[0-9]*.csv')]
Out[18]: ['dat01.csv', 'dat99.csv']
In [2]: fnmatch('foo.txt','*.TXT')
Out[2]: TrueIn [3]: fnmatchcase('foo.txt','*.TXT')
Out[3]: False#推倒式过滤文件名的字符串
In [20]: addresses = [...:     '5412 N CLARK ST',...:     '1060 W ADDISON ST',...:     '1039 W GRANVILLE AVE',...:     '2122 N CLARK ST',...:     '4802 N BROADWAY',...: ]In [21]: [i for i in addresses if fnmatch(i,'*ST')]
Out[21]: ['5412 N CLARK ST', '1060 W ADDISON ST', '2122 N CLARK ST']In [22]: [i for i in addresses if fnmatch(i,'*CLARK*')]
Out[22]: ['5412 N CLARK ST', '2122 N CLARK ST']



In [23]: text1 = 'today is 10/19/2018.Pycon starts 3/13/2019.'In [24]: import reIn [25]: re.match(r'\d+/\d+/\d+',text1)In [28]: re.findall(r'\d+/\d+/\d+',text1)
Out[28]: ['10/19/2018', '3/13/2019']In [29]: text2 = '11/20/2018'In [30]: re.match(r'\d+/\d+/\d+',text2)
Out[30]: <re.Match object; span=(0, 10), match='11/20/2018'>In [31]: result = re.match(r'(\d+)/(\d+)/(\d+)',text2)In [32]: result.groups()
Out[32]: ('11', '20', '2018')In [33]: result.group(0)
Out[33]: '11/20/2018'In [34]: result.group(1)
Out[34]: '11'In [35]: result.group(2)
Out[35]: '20'In [36]: result.group(3)
Out[36]: '2018'
In [39]: text1 = 'today is 10/19/2018.Pycon starts 3/13/2019.'In [40]: for month,day,year in re.findall(r'(\d+)/(\d+)/(\d+)',text1):...:     print('{}-{}-{}'.format(year,month,day))...:
In [43]: text1 = 'today is 10/19/2018.Pycon starts 3/13/2019.'In [44]: for i in re.finditer(r'(\d+)/(\d+)/(\d+)',text1):...:     print(i.groups())...:
('10', '19', '2018')
('3', '13', '2019')



In [45]: text = 'abcabcabcabc'In [46]: text.replace('a','ee')
Out[46]: 'eebceebceebceebc'


In [47]: text3 = 'today is 10/19/2018. pycon starts 3/13/2013.'In [49]: re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',text3)
Out[49]: 'today is 2018-10-19. pycon starts 2013-3-13.'


In [54]: text3 = 'today is 10/19/2018. pycon starts 3/13/2013.'In [55]: from calendar import month_abbrIn [56]: def change_date(m):...:     mon_name = month_abbr[int(m.group(1))]...:     return '{} {} {}'.format(m.group(2),mon_name,m.group(3))...: ...: In [57]: re.sub(r'(\d+)/(\d+)/(\d+)',change_date,text3)
Out[57]: 'today is 19 Oct 2018. pycon starts 13 Mar 2013.'
In [58]: re.subn(r'(\d+)/(\d+)/(\d+)',change_date,text3)
Out[58]: ('today is 19 Oct 2018. pycon starts 13 Mar 2013.', 2)



In [60]: text = 'UPPER PYTHON,lower python, mixed Python'
In [61]: re.findall('python',text,flags=re.IGNORECASE)
Out[61]: ['PYTHON', 'python', 'Python']
import re
def matchcase(word):def replace(m):text = m.group()if text.isupper():return word.upper()elif text.islower():return word.lower()elif text[0].isupper():return word.capitalize()else:return wordreturn replace
text = 'UPPER PYTHON,lower python,Mixed Python'


str_pat = re.compile(r'\"(.*)\"')
text1 = 'computer says "no."'
Out[18]: ['no.']
text2 = 'computer says "no." phone says "yes."'
str_pat.findall(text2) #在使用.*贪婪匹配时它将匹配尽可能多的匹配项
Out[20]: ['no." phone says "yes.']
str_pat = re.compile(r'\"(.*?)\"')  #只需要在多匹配后加上?号,就会以最少的匹配模式进行匹配
Out[22]: ['no.', 'yes.']


comment = re.compile(r'python(.*?)end')
text1 = 'python is ver good \n so so end'
comment.findall(text1)  #.*匹配不到换行符
Out[27]: []
comment = re.compile(r'python(.*?)end',flags=re.DOTALL) #加上标记re.DOTALL将匹配所有的字符包括换行符
Out[29]: [' is ver good \n so so ']
comment = re.compile(r'python((?:.|\n)*?)end') #(?:.|\n)会指定一个非捕获组,它只做匹配但不捕获结果,也不分配组号
Out[31]: [' is ver good \n so so ']


s1 = 'spicy\u00f1o'  #它使用的是(U+00F1)全组成的(fully composed)
s2 = 'spicy\u0303o' #它使用的是(U+0303)拉丁字母组合而成
s1 == s2   #所以字符比较是不相等的
Out[35]: False
Out[36]: 'spicyño'
Out[37]: 'spicỹo'


In [21]: s = ' hello world \n'                                               In [22]: s.strip()
Out[22]: 'hello world'In [23]: s.lstrip()
Out[23]: 'hello world \n'In [24]: s.rstrip()
Out[24]: ' hello world'In [25]: t = '-----hello====='                                               In [26]: t.lstrip('-')             #指定去除字符
Out[26]: 'hello====='In [27]: t.strip('-=')       #可以指定多个字符
Out[27]: 'hello'#使用上面的方法不能去除中间的字符,要去除中间的字符可以使用replace()方法或正则表达式替换
In [28]: s.replace(' ','')
Out[28]: 'helloworld\n'In [29]: re.sub('\s+', '',s)
Out[29]: 'helloworld'


In [31]: text = 'hello world'                                                In [32]: text.ljust(30)
Out[32]: 'hello world                   'In [33]: text.rjust(30)
Out[33]: '                   hello world'In [34]: text.center(30)
Out[34]: '         hello world          'In [35]: text.center(30,'=')
Out[35]: '=========hello world=========='
In [36]: format(text,'>20')
Out[36]: '         hello world'In [37]: format(text,'<20')
Out[37]: 'hello world         'In [38]: format(text,'^20')
Out[38]: '    hello world     'In [39]: format(text,'=^20')
Out[39]: '====hello world====='In [40]: format(text,'=^20s')
Out[40]: '====hello world====='In [41]: format(text,'*^20s')
Out[41]: '****hello world*****'
In [42]: '{:>10s}{:<10s}'.format('hello','world')
Out[42]: '     helloworld     'In [43]: '{:#>10s} {:&<10s}'.format('hello','world')
Out[43]: '#####hello world&&&&&'


In [44]: data = ['I','like','is','python']                                   In [45]: ' '.join(data)
Out[45]: 'I like is python'In [46]: ','.join(data)
Out[46]: 'I,like,is,python'#利用生成器表达式转换后链接字符串会更高效
In [47]: ','.join(str(d) for d in data)
Out[47]: 'I,like,is,python'


In [5]: str_variable = "{name} today {num} old year"In [6]: str_variable.format(name='zhang',num=20)
Out[6]: 'zhang today 20 old year'#另一种方式是使用format_map()和vars()联合匹配当前环境中的变量名
In [7]: name = 'python'In [8]: num = 18In [9]: str_variable.format_map(vars())
Out[9]: 'python today 18 old year'
In [10]: class info:...:     def __init__(self,name,num):...:         self.name = name...:         self.num = num...:         In [11]: a = info('shell',23)In [12]: str_variable.format_map(vars(a))
Out[12]: 'shell today 23 old year'
In [13]: class safesub(dict):...:     def __missing__(self,key):...:         return '{' + key + '}'...:     In [14]: del numIn [15]: str_variable.format_map(safesub(vars()))
Out[15]: 'python today {num} old year'


>>> import textwrap
>>> s = "look into eyes, look into my eyes, the eyes,the eyes, \
... the eyes, not around the eyes, don't look around the eyes, \
... look into my eyes, you're under."
>>> print(textwrap.fill(s,70)
... )
look into eyes, look into my eyes, the eyes,the eyes, the eyes, not
around the eyes, don't look around the eyes, look into my eyes, you're
>>> print(textwrap.fill(s,40))
look into eyes, look into my eyes, the
eyes,the eyes, the eyes, not around the
eyes, don't look around the eyes, look
into my eyes, you're under.
>>> print(textwrap.fill(s,40,initial_indent=' '))look into eyes, look into my eyes, the
eyes,the eyes, the eyes, not around the
eyes, don't look around the eyes, look
into my eyes, you're under.
>>> print(textwrap.fill(s,40,subsequent_indent=' '))
look into eyes, look into my eyes, theeyes,the eyes, the eyes, not around theeyes, don't look around the eyes, lookinto my eyes, you're under.
>>> import os
>>> print(textwrap.fill(s,os.get_terminal_size().columns))
look into eyes, look into my eyes, the eyes,the eyes, the eyes, not around the eyes, don't look around
the eyes, look into my eyes, you're under.
>>> print(os.get_terminal_size())
os.terminal_size(columns=105, lines=32)


In [1]: s = 'Elements are written aa "<tag>text</tag>".'In [2]: import htmlIn [3]: s
Out[3]: 'Elements are written aa "<tag>text</tag>".'In [4]: html.escape(s)
Out[4]: 'Elements are written aa "<tag>text</tag>".'
In [5]: html.escape(s,quote=False)
Out[5]: 'Elements are written aa "<tag>text</tag>".'
In [6]: s1 = 'Spicy "Jalapeño&quot.'
In [7]: from html.parser import HTMLParser
In [9]: p = HTMLParser()
In [11]: p.unescape(s1)
Out[11]: 'Spicy "Jalapeño".'
In [12]: s2 = p.unescape(s1)
In [13]: s2.encode('ascii',errors='xmlcharrefreplace')
Out[13]: b'Spicy "Jalapeño".'
In [14]: s3 = 'the prompt is >>>'
In [15]: from xml.sax.saxutils import unescape
In [16]: unescape(s3)
Out[16]: 'the prompt is >>>'


#从左到右将字符串解析为标记流(stream of tokens)
In [17]: text = 'foo = 23 + 42 * 10'In [18]: tokens= [('NAME','foo'),('EQ','='),('NUM','23'),('PLUS','+'),('NUM','42'),('TIMES','*'),('NUM','...: 10')]In [19]: import re
InIn [20]: NAME = r'(?P<NAME>[a-zA_][a-zA-Z_0-9]*)'In [21]: NUM = r'(?P<NUM>\d+)'In [22]: PLUS = r'(?P<PLUS>\+)'In [23]: TIMES = r'(?P<TIMES>\*)'In [24]: EQ = r'(?P<EQ>=)'In [25]: WS = r'(?P<WS>\s+)'In [26]: master_pat = re.compile('|'.join([NAME,NUM,PLUS,TIMES,EQ,WS]))
In [27]: scanner = master_pat.scanner('foo = 42')
In [28]: scanner.match()
Out[28]: <re.Match object; span=(0, 3), match='foo'>In [29]: _.lastgroup,_.group()
Out[29]: ('NAME', 'foo')In [30]: scanner.match()
Out[30]: <re.Match object; span=(3, 4), match=' '>In [31]: _.lastgroup,_.group()
Out[31]: ('WS', ' ')In [32]: scanner.match()
Out[32]: <re.Match object; span=(4, 5), match='='>In [33]: _.lastgroup,_.group()
Out[33]: ('EQ', '=')In [34]: scanner.match()
Out[34]: <re.Match object; span=(5, 6), match=' '>In [35]: _.lastgroup,_.group()
Out[35]: ('WS', ' ')In [36]: scanner.match()
Out[36]: <re.Match object; span=(6, 8), match='42'>In [37]: _.lastgroup,_.group()
Out[37]: ('NUM', '42')
In [40]: from collections import namedtupleIn [41]: token = namedtuple('token',['type','value'])In [42]: def generate_tokens(pat,text):...:     scanner = pat.scanner(text)...:     for m in iter(scanner.match,None):...:         yield token(m.lastgroup,m.group())...:         In [43]: for tok in generate_tokens(master_pat,'foo = 42'):...:     print(tok)...:
token(type='NAME', value='foo')
token(type='WS', value=' ')
token(type='EQ', value='=')
token(type='WS', value=' ')
token(type='NUM', value='42')
In [45]: tokens = (tok for tok in generate_tokens(master_pat,text) if tok.type != 'WS')In [46]: for tok in tokens:print(tok)
token(type='NAME', value='foo')
token(type='EQ', value='=')
token(type='NUM', value='23')
token(type='PLUS', value='+')
token(type='NUM', value='42')
token(type='TIMES', value='*')
token(type='NUM', value='10')


import re
import collections#定义文本分词变量
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'master_pat = re.compile('|'.join([NUM,PLUS,MINUS,TIMES,DIVIDE,LPAREN,RPAREN,WS]))
Token = collections.namedtuple('Token',['type','value'])#过滤文本分词
def generate_tokens(text):scanner = master_pat.scanner(text)for m in iter(scanner.match,None):tok = Token(m.lastgroup,m.group())if tok.type != 'WS':yield tokclass ExpressionEvaluator:def parse(self,text):self.tokens = generate_tokens(text)self.nexttok = Noneself.tok = Noneself._advance()return self.expr()def _advance(self):self.tok,self.nexttok = self.nexttok,next(self.tokens,None)def _accept(self,toktype):if self.nexttok and self.nexttok.type == toktype:self._advance()return Trueelse:return Falsedef _expect(self,toktype):if not self._accept(toktype):raise SyntaxError('Expected' + toktype)def expr(self):exprval = self.term()while self._accept('PLUS') or self._accept('MINUS'):op = self.tok.typeright = self.term()if op == 'PLUS':exprval += rightelif op == 'MINUS':exprval -= rightreturn exprvaldef term(self):termval = self.factor()while self._accept('TIMES') or self._accept('DIVIDE'):op = self.tok.typeright = self.factor()if op == 'TIMES':termval *= rightelif op == 'DIVIDE':termval /= rightreturn termvaldef factor(self):if self._accept('NUM'):return int(self.tok.value)elif self._accept('LPAREN'):exprval = self.expr()self._expect('RPAREN')return exprvalelse:raise SyntaxError('Expected NUMBER or LPAREN')if __name__ == '__main__':e = ExpressionEvaluator()print(e.parse('2'))print(e.parse('2 + 3'))print(e.parse('2 + 3 * 4'))print(e.parse('2 + (3 + 4) * 5'))


In [2]: data = b'hello world'In [3]: data
Out[3]: b'hello world'
In [4]: data[0:5]
Out[4]: b'hello'
In [6]: data.split()
Out[6]: [b'hello', b'world']
In [7]: data.replace(b'hello',b'python')
Out[7]: b'python world'
In [8]: data[0]
Out[8]: 104


  1. python 字符串 数字_Python基础教程:数字、字符串

    Python 数字 Python 数字数据类型用于存储数值. 数据类型是不允许改变的,这就意味着如果改变数字数据类型得值,将重新分配内存空间. 以下实例在变量赋值时数字对象将被创建: var1 = 1 ...

  2. python字符串与文本处理技巧(3):字符剔除、字符对齐、字符拼接、字符插入变量

    1. 删除字符串中不需要的字符 去掉文本字符串开头,结尾或者中间不想要的字符,比如空白. strip() & Istrip() & rstrip() strip() 方法能用于删除开始 ...

  3. python字符串与文本处理技巧(2):大小写敏感搜索、最短匹配、多行匹配、Unicode标准化

    1. 字符串忽略大小写的搜索替换 re.findall(patter, string, flags=re.IGNORECASE) 当我们需要忽略字符串中的字母大小写进行模式搜索时可以采用如下方案: i ...

  4. python字符串与文本处理技巧(1):分割、首尾匹配、模式搜索、匹配替换

    1. 字符串分割 将一个字符串分割为多个字段,但是分隔符(还有周围的空格)并不是固定的. str.split() 和 re.split() string 对象的 split() 方法只适应于非常简单的 ...

  5. python 字符串替换_Python基础教程,第四讲,字符串详解

    本节课主要和大家一起学习一下Python中的字符串操作,对字符串的操作在开发工作中的使用频率比较高,所以单独作为一课来讲. 学完此次课程,我能做什么? 学完本次课程后,我们将学会如何创建字符串,以及如 ...

  6. python字符串与文本处理技巧(4): 格式化输出、令牌解析、串上串

    1. 以指定列宽格式化字符串 很多情况下,我们有一些长字符串,想以指定的列宽将它们重新格式化. textwarp() import textwrap import oss = "Look i ...

  7. python字符串模糊匹配_NLP教程:用Fuzzywuzzy进行字符串模糊匹配

    在计算机科学中,字符串模糊匹配( fuzzy string matching)是一种近似地(而不是精确地)查找与模式匹配的字符串的技术.换句话说,字符串模糊匹配是一种搜索,即使用户拼错单词或只输入部分 ...

  8. python islower函数_python字符串是否是小写-python 字符串小写-python islower函数-python islower函数未定义-嗨客网...

    Python字符串是否是小写教程 在开发过程中,有时候我们需要判断一个 Python islower()函数详解 语法 str.islower() -> bool 参数 参数 描述 str 表示 ...

  9. python 字符串首字母,Python 字符串首字母大写-Python设置字符串首字母大写-python title()作用-python title函数-嗨客网...

    Python字符串首字母大写 Python字符串首字母大写教程 在开发过程中,很多时候我们需要将一个 Python title()函数详解 语法 S.title() -> str 参数 参数 描 ...


  1. snmpd 子代理模式编译测试
  2. YOLO窥见黑夜|YOLO in the Dark让黑夜里的目标检测成为可能
  3. 利用ASP.NET2.0向导控件一步步建立与用户的交互--------提高和自定义用户体验
  4. 11尺寸长宽 iphone_LED显示屏的尺寸规格计算方法
  5. 【Python3网络爬虫开发实战】1.4.1-MySQL的安装
  6. 3D引擎多线程:渲染与逻辑分离
  7. 大数据 Spark 架构
  8. 【转】每天一个linux命令(11):nl命令
  9. 织梦DEDE一键搬迁网站模板数据到DSCMS教程
  10. javaw java_我可以找出java程序是使用java还是javaw启动的
  11. 微信公众号工作中如何产生新媒体思维
  12. sql 数据库练习, 学生表,成绩表
  13. Power BI时间智能
  14. 远程主机和本地文件互传的2种方法
  15. WPF入门教程系列(4)
  16. 高德地图绘制点(简单实现)
  17. Python学习笔记(4)~Python基础练习之常用内置函数(1-10)
  18. CSS中的长度单位和HTML5中多媒体标签的使用
  19. Chapter 3 (Determinants): Cramer‘s rule, volume, and linear transformations (克拉默法则、体积和线性变换)
  20. Python编程之求数列20项和


  1. PV操作经典例题——银行业务办理问题
  2. 使用JDBC连接MySQL数据库
  3. mana spark有中文吗_玛娜火花Mana Spark单机版下载-玛娜火花Mana Spark游戏下载-k73游戏之家...
  4. CocosCreator Effect (Shader) - 斜条纹如何画
  5. 求助!spyder beautifulsoup4显示错误:AttributeError: 'HTMLParserTreeBuilder' object has no attribute 'initia
  6. hdu 1232 并查集
  7. 8个iPhone防盗秘籍 为手机和资料安全保驾护航
  8. python echarts城市热力图_ECharts-热力图实例
  9. 网约车大战重来:易到回归降佣金,美团入局每单补贴超20元
  10. ping内网一台虚拟机延时很大(hyper-v虚拟机)的解决办法