

import string
'''# 函数capwords会把一个字符串中的所有单词的首字母变成大写
s = "when i was young, i'd listen to the radio"
print(s)  # when i was young, i'd listen to the radio
print(string.capwords(s, sep=" "))  # When I Was Young, I'd Listen To The Radio
# 这段代码的结果等同于先调用split,把结果中的单词首字母大写,然后调用join来合并结果。sep可以省略,默认为空格



import string'''
# 先看看str.format这种常规做法
values = {"var": "foo"}
s = "var: %(var)s, escape: %%, %(var)sxxx"
print("result ->", s % values)  # result -> var: foo, escape: %, fooxxx
# 注意到中间的escape: %%,这种触发字符要想让其失去效果,只当做普通字符来处理的话,要重复两次来进行转义# 下面来使用模板,可以看到最后一个var加上了{},因为var和后面的文本黏在一起了
s = "var: $var, escape: $$, ${var}xxx"
# 第一步:先得到可以用来渲染的模板
t = string.Template(s)
# 第二步:进行替换,会有一个返回值,就是我们替换之后的结果
print(t.substitute(values))  # var: foo, escape: $, fooxxx
print(t.substitute(**values))  # var: foo, escape: $, fooxxx
# 可以看到var都被替换成了foo。但是注意到我们传入value(一个字典),和**value(var=foo关键字参数)得到的结果是一样的。
# 因为string的模板不像flask,tornado等框架的模板一样,支持逻辑上的运算或者数据结构上的变换。
# 如果在jinja2中,我传入字典的话,那么在模板中还可以进行取值,但是string中的模板不支持,只支持字符间的替换。# 我们来看看jinja2
import jinja2
s = "{{var}}--{{dic.get('key', 'value')}}--{{dic.get('KEY', 'value')}}"
t = jinja2.Template(s)
print(t.render(var="foo", dic={"key": "mashiro"}))  # foo--mashiro--value
# 可以看到jinja2是支持字典的取值,由于dic中没有"KEY"这个键,那么获取默认值。但是string.Template是不支持的。
# 因此对于string.Template来说,传入关键字参数和字典是一样的,传入字典会自动将字典给打开,根据k,v进行替换# 此外对于字符串的模板还有一个安全的用法
s = "$var--$missing"
t = string.Template(s)
# 我这里没有传入missing,因此如果使用substitute则会报错,但如果是safe_substitute的话会自动忽略,只对传入的进行替换
# 那如果我多穿了一个,比如说s中并没有$xxx,那么会怎么样呢?
print(t.safe_substitute(var="foo", xxx="xxxxx"))  # foo--$missing
# 显然没有任何问题,因此可以总结一下
# s中定义了,但是模板替换的时候没有传相应的值,那么substitute会报错,safe_substitute不会
# 但是s中没有定义,比如$xxx,而我们却多传了,那么substitute和safe_substitute都不会报错,会自动忽略









import string'''
string模块包括大量与ASCII和数值字符集相关的常量,都可以通过string这个模块直接获取whitespace = ' \t\n\r\v\f'
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_letters = ascii_lowercase + ascii_uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
printable = digits + ascii_letters + punctuation + whitespace




import textwrap'''
text = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
# fill函数取文本作为输入,生成格式化文本作为输出
print(textwrap.fill(text, width=50))
'''There are moments in life when you miss
someone      so much that you just want to pick
them      from your dreams and hug them for real!
Dream what      you want to dream;go where you
want to go;     be what you want to be,because you
have      only one life and one chance to do all
the things you want to do.
'''# 结果不是太让人满意。文本虽然已经对齐,不过只有第一行保留了缩进,后面各行的空格则嵌入在段落中



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
'''# 关于刚才的例子,其输出中混合嵌入了制表符和额外的空格,所以格式不是太美观。
# 用dedent可以去除示例文本中所有的行前面的空白符,这会生成更好的结果
# 并且允许在Python代码中直接使用docstring或者内嵌的多行字符串,同时去除代码本身的格式。
There are moments in life when you miss someone
so much that you just want to pick them
from your dreams and hug them for real! Dream what
you want to dream;go where you want to go;
be what you want to be,because you have
only one life and one chance to do all the things you want to do.
# 可以看到dedent作用就是把每一行开头的缩进给去掉



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
'''dedent_text = textwrap.dedent(text).strip()
print(textwrap.fill(dedent_text, width=60))
There are moments in life when you miss someone  so much
that you just want to pick them  from your dreams and hug
them for real! Dream what  you want to dream;go where you
want to go; be what you want to be,because you have  only
one life and one chance to do all the things you want to do.



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
# 可以使用indent函数为一个字符串的所有行增加一致的前缀文本
dedent_text = textwrap.dedent(text).strip()
final_text = textwrap.indent(dedent_text, ">>>")
>>>There are moments in life when you miss someone
>>>so much that you just want to pick them
>>>from your dreams and hug them for real! Dream what
>>>you want to dream;go where you want to go;
>>>be what you want to be,because you have
>>>only one life and one chance to do all the things you want to do.
'''# 除此之外还可以给指定行添加
final_text = textwrap.indent(dedent_text, prefix="->", predicate=lambda line: len(line.strip()) > 40)
->There are moments in life when you miss someone
so much that you just want to pick them
->from your dreams and hug them for real! Dream what
->you want to dream;go where you want to go;
be what you want to be,because you have
->only one life and one chance to do all the things you want to do.
# 显然lambda里面的line就是每一行的文本,指定文本长度大于40的



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
# 不仅可以设置输出的宽度,还可以采用同样的方式单独控制首行的缩进,使首行的缩进不同于后续的各行
dedent_text = textwrap.dedent(text).strip()
print(textwrap.fill(dedent_text,initial_indent="",subsequent_indent=" "*4,width=50))
There are moments in life when you miss someoneso much that you just want to pick them  fromyour dreams and hug them for real! Dream whatyou want to dream;go where you want to go; bewhat you want to be,because you have  only onelife and one chance to do all the things youwant to do.
# 这便可以生成悬挂缩进,即首行缩进小于其他行的缩进
# 缩进值也可以包含非空白字符。
There are moments in life when you miss someone
****so much that you just want to pick them  from
****your dreams and hug them for real! Dream what
****you want to dream;go where you want to go; be
****what you want to be,because you have  only one
****life and one chance to do all the things you
****want to do.



import textwraptext = '''There are moments in life when you miss someone so much that you just want to pick them from your dreams and hug them for real! Dream what you want to dream;go where you want to go;be what you want to be,because you have only one life and one chance to do all the things you want to do.
dedent_text = textwrap.dedent(text).strip()
print(textwrap.shorten(dedent_text, width=50))  # There are moments in life when you miss [...]




import re'''
pattern = "this"
text = "你知道this的含义吗?"
match = re.search(pattern, text)
print(match)  # <re.Match object; span=(3, 7), match='this'>
print(match.start())  # 3
print(match.end())  # 7
print(text[match.start(): match.end()])  # this# 以上是search,那么match呢?
match = re.match(pattern, text)
print(match)  # None
# 返回结果为None,match和search比较类似,但match只能从头匹配查找, 而search可以在字符串的任意位置匹配查找。# 即使有多个满足条件的也只会返回第一个。
print(re.search("this", "123this456this789this"))  # <re.Match object; span=(3, 7), match='this'># 如果没有则返回None,此时调用下面的start或end方法也会报错。会抛出AttributeError: 'NoneType' object has no attribute 'xxxxxx'
# 因此可以加上一层判断
match = re.match(pattern, text)
if match:print(match.start())



import re'''
for number in number_list:if re.match("135", number):print(number)
# 得到编译之后的对象
comp = re.compile("135")
# 下面就可以直接使用这个编译的对象进行查找就可以了
number_list = ["13541258742", "18845214415", "13512441552"]
for number in number_list:# 可以看到,如果使用re的话,需要传入pattern,但是我们将pattern进行编译之后,就可以直接调用了。if comp.match(number):print(number)'''1354125874213512441552''''''
re.match(pattern, text) <==> re.compile(pattern).match(text)
# 可以对比字符串,准确的说应该对比成类与对象
s = "abc|abcd|efg"
print(s.split("|"))  # ['abc', 'abcd', 'efg']
# 也可以使用str这个类来调用,但是不方便,于是我们都使用字符串本身,也就是类str的实例对象
print(str.split(s, "|"))  # ['abc', 'abcd', 'efg']# summary:可以传入pattern和text作为参数,调用re.match,也可以将pattern编译之后,用编译之后的对象调用match方法,此时只需传入text



import re'''
pattern = "abc"
text = "abc|abc|abc|abc"
print(re.findall(pattern, text))  # ['abc', 'abc', 'abc', 'abc']
# 可以看到,直接将满足条件的所有实例全部以列表的形式获取出来了,并且如果满足条件的实例只有一个,那么得到的仍是一个列表,只是列表里面只有一个元素# 除此之外还有一个迭代器模式
res = re.finditer(pattern, text)
print(res)  # <callable_iterator object at 0x00000000029C4400>
for v in res:print(v)'''<re.Match object; span=(0, 3), match='abc'><re.Match object; span=(4, 7), match='abc'><re.Match object; span=(8, 11), match='abc'><re.Match object; span=(12, 15), match='abc'>'''# 得到的是一个Match对象



import re'''
# 重复
{m,}:省略n,那么表示至少出现m次,最多则没有限制因此:* == {0,}   + == {1,}    ? == {0,1}
'''# b出现一次到三次,注意这里不是ab出现一到三次,*+?{}只会作用于它的前一个字符
print(re.search("ab{1,3}", "abcabbc"))  # <re.Match object; span=(0, 2), match='ab'>
print(re.search("ab{2,3}", "abcabbc"))  # <re.Match object; span=(3, 6), match='abb'>
'''# 可以看到,要求是一到三次,ab,abb,abbb都符合,最终是取了abbb。
# 正则的模式则是贪婪模式,能往多了匹配就往多了匹配
print(re.search("ab{1,3}", "abbbbb"))  # <re.Match object; span=(0, 4), match='abbb'># *+?这些元字符也是同样的道理
print(re.search("ab*", "abbbbb"))  # <re.Match object; span=(0, 6), match='abbbbb'>
print(re.search("ab+", "abbbbb"))  # <re.Match object; span=(0, 6), match='abbbbb'>
print(re.search("ab?", "abbbbb"))  # <re.Match object; span=(0, 2), match='ab'># 那么如何不使用这种贪婪模式呢?可以直接在*+?{}后面加上?即可,表示关闭贪婪模式
# 出现一到三次,关闭贪婪,只获取一个b
print(re.search("ab{1,3}?", "abbbbb"))  # <re.Match object; span=(0, 2), match='ab'>
# *出现0次或多次,关闭贪婪模式,获取0次
print(re.search("ab*?", "abbbbb"))  # <re.Match object; span=(0, 1), match='a'>
# 显然获取一次
print(re.search("ab+?", "abbbbb"))  # <re.Match object; span=(0, 2), match='ab'>
# 出现0次或一次,关闭贪婪模式后显然获取0次
print(re.search("ab??", "abbbbb"))  # <re.Match object; span=(0, 1), match='a'>
# .表示除了换行符的任意字符
print(re.search(".{1,5}", "aaaaa"))  # <re.Match object; span=(0, 5), match='aaaaa'>
print(re.search(".{1,5}", "aa\naa"))  # <re.Match object; span=(0, 2), match='aa'># 字符集
print(re.findall("a[love]c", "awc|aec|afc|akc|adc"))  # ['aec']
# 字符集[love]就可以看做是一个整体,也可以搭配元字符使用
[love]{1,3} 表示匹配出现一到三次的字符,什么的字符呢?l、o、v、e四个字符当中的某一个字符
# v在[love]当中,匹配vvv
print(re.findall("[love]{1,3}", "vvv"))  # ['vvv']
# k不在但是v在,匹配vv
print(re.findall("[love]{1,3}", "kvv"))  # ['vv']
# 匹配v
print(re.findall("[love]{1,3}", "kv"))  # ['v']
# a不在,但ve在,匹配ve
print(re.findall("[love]{1,3}", "ave"))  # ['ve']
# findall是查找所有,l在[love]里面,o在,v在,但是最多出现三次匹配成功结束。继续查找,最后的e也在,但是_不在,所以匹配结束,得到e。
# y不在,但是o在,u又不在,所以又匹配出e。因此最终结果是['lov', 'e', 'o']。
print(re.findall("[love]{1,3}", "love_you"))  # ['lov', 'e', 'o']# 那我如果想取反呢?也就是我要不在love里面的字符。可以使用[^love],表示非l、o、v、e的任意字符# 这便是字符集,但是又有一个问题。如果我想获取为小写字母的字符呢?难道把26的字母都写一遍吗?当然不用
# [a-z]表示所有的小写字母
# [A-Z]表示所有的大写字母
# [0-9]表示所有的数字
# [a-zA-Z0-9]表示所有的字母和数字
print(re.findall("[a-z][A-Z]", "aB|ac"))  # ['aB']
print(re.findall("[a-zA-Z]", "a|C|4|尻"))  # ['a', 'C']
print(re.findall("[a-zA-Z0-9]", "a|C|4|尻"))  # ['a', 'C', '4']# 转义码
# \d表示数字
# \D表示非数字
# \w表示字母、数字,注意:这里包括中文
# \W表示非字母数字
# \s表示空白符(制表符、空格、换行等)
# \S表示非空白符# 但是\在操作系统中会先做一层转义,因此需要两个\。在操作系统层面上,两个\变成一个\,然后再和d组合成\d进行正则比配
print(re.search("\\d{1,3}", "1234"))  # <re.Match object; span=(0, 3), match='123'>
# 那如果我想匹配\呢?
print(re.search("\\\\hello", "\\hello"))  # <re.Match object; span=(0, 6), match='\\hello'>
# 此时只需要一个反斜杠即可,可以看到Python中的\w是包含中文的
print(re.search(r"\w{1,2}", "古明地觉"))  # <re.Match object; span=(0, 2), match='古明'># 锚定
print(re.search(r"^abc", "abcd"))  # <re.Match object; span=(0, 3), match='abc'>
print(re.search(r"^abc", "aabcd"))  # Noneprint(re.search(r"abc$", "dabc"))  # <re.Match object; span=(1, 4), match='abc'>
print(re.search(r"abc$", "abcd"))  # None



import re'''
pattern = re.compile(r"123")
text = "123|123|123"
print(pattern.search(text, pos=2, endpos=7))  # <re.Match object; span=(4, 7), match='123'>
# 指定从2开始,7结束



import re'''
# 有两个abc,但我要的是夹在xxx和xxx之间的abc
match = re.search(r"xxx(abc)xxx", "abcxxxabcxxx")
print(match)  # <re.Match object; span=(3, 12), match='xxxabcxxx'>
# 这样匹配依旧会匹配全局,怎么样才能把括号里面的内容给抽取出来呢?
print(match.group(0))  # xxxabcxxx
print(match.group(1))  # abc
'''match = re.search(r"123(.+?)456(.+?)789", "123abc456def789")
print(match.group(0))  # 123abc456def789
print(match.group(1))  # abc
print(match.group(2))  # def# 那么问题来了,如果是这样呢?
match = re.search(r"123((.+?)456(.+?))789", "123abc456def789")
print(match.group(0))  # 123abc456def789
print(match.group(1))  # abc456def
print(match.group(2))  # abc
print(match.group(3))  # def
'''# 此外还可以给组进行命名。只需要在括号里面加上?P<name>即可,name是我们指定的分组的名字
match = re.search(r"123(?P<yoyoyo>.+?)456(?P<哈哈哈>.+?)789", "123纳尼456我屮艸芔茻789")
print(match.group("yoyoyo"), match.group("哈哈哈"))  # 纳尼 我屮艸芔茻
print(match.group(1), match.group(2))  # 纳尼 我屮艸芔茻
# 并且使用组的序号依旧是可以的# 除了group之外,还可以使用groupdict和groups
match = re.search(r"123(?P<yoyoyo>.+?)456(?P<哈哈哈>.+?)789", "123纳尼456我屮艸芔茻789")
print(match.groups())  # ('纳尼', '我屮艸芔茻')
print(match.groupdict())  # {'yoyoyo': '纳尼', '哈哈哈': '我屮艸芔茻'}# groups无论是否指定组名,都会获取到。但是groupdict只会获取指定组名的分组,比如说:
match = re.search(r"123(?P<yoyoyo>.+?)456(.+?)789", "123纳尼456我屮艸芔茻789")
print(match.groups())  # ('纳尼', '我屮艸芔茻')
# 可以看到没有指定组名的就获取不到了
print(match.groupdict())  # {'yoyoyo': '纳尼'}# 此外在分组的时候,还可以指定管道符|,表示或者。比如
# jpg|png表示获取的是jpg或者png,注意:管道符会作用于两边的全部字符,比如:
# www.mashiro.jpg|png,则表示的是www.mashiro.jpg或者png
# www.mashiro.(jpg|png),此时的管道符只会作用于括号里面两端的字符,表示www.mashiro.jpg或者www.mashiro.png
print(re.search(r"www\.mashiro\.(jpg|png)", "www.mashiro.jpg"))  # <re.Match object; span=(0, 15), match='www.mashiro.jpg'>
# 注意这里的\.表示转义,让.表示普通的. 不是具有匹配字符能力的.# 但是这里又出现问题了,比如说
text = '''这里有图片哦www.1.jpg,有很多格式的哦www.2.png,想看吗,还会动哦,www.3.gif那还等什么呢?www.banana.jpg,快去吧,那个象征自由的男人在等着你www.象征自由的男人--尻比.png
res = re.findall(r"www.+?(jpg|png|gif)", text)
print(res)  # ['jpg', 'png', 'gif', 'jpg', 'png']
'''res = re.findall(r"(www.+?(jpg|png|gif))", text)
print(res)  # [('www.1.jpg', 'jpg'), ('www.2.png', 'png'), ('www.3.gif', 'gif'), ('www.banana.jpg', 'jpg'), ('www.象征自由的男人--尻比.png', 'png')]
# 可以看到最外层的分组也被我们所捕获了,多个分组的内容回族和成一个元组。因此可以使用索引获取链接,但这还不是最完美的方法。
# 有什么办法,让里面的那个分组失去效果呢?就是说,我给你加上括号只是为了多匹配一些格式罢了,你就不要自作聪明地当做分组来处理了
# 答案是有的,只需要加上?:即可,在括号里面加上?:表示让分组失去效果,也就是不表示分组
res = re.findall(r"www.+?(?:jpg|png|gif)", text)
print(res)  # ['www.1.jpg', 'www.2.png', 'www.3.gif', 'www.banana.jpg', 'www.象征自由的男人--尻比.png']
# 可以看到匹配成功,而且此时最外层也不需要再加上括号了,因为里面的分组失去效果了,相当于就没有分组了,如果没有分组,那么默认匹配整体。# 最后插一句,聊一聊www.+?(?:jpg|png|gif),为什么要是.+?呢,如果是.+行不行,比如:
print(re.search(r"www.+?jpg", "www.1.jpg我屮艸芔茻www.2.jpg").group())  # www.1.jpg
# 我把?去掉,那么等于变成了贪婪模式。
print(re.search(r"www.+jpg", "www.1.jpg我屮艸芔茻www.2.jpg").group())  # www.1.jpg我屮艸芔茻www.2.jpg
# 那么会从第一个www开始,匹配到最后一个jpg,因此要注意开启非贪婪模式



import re'''
def search(pattern, string, flags=0):
'''# 大小写无关
# 这便是flags的作用,可以用来改变引擎处理表达式的方式
# re.IGNORECASE表示忽略大小写敏感模式,其中re.IGNORECASE也可以写成re.I
match = re.match(r"aa", "aA", flags=re.IGNORECASE)
print(match)  # <re.Match object; span=(0, 2), match='aA'># 多行输入
match = re.match(r".+", "aabb\ncc")
print(match)  # <re.Match object; span=(0, 4), match='aabb'>
match = re.match(r".+", "aabb\ncc", flags=re.DOTALL)
print(match)  # <re.Match object; span=(0, 7), match='aabb\ncc'># Unicode
print(re.match(r"\w+", "love中国"))  # <re.Match object; span=(0, 6), match='love中国'>
print(re.match(r"\w+", "love中国", flags=re.ASCII))  # <re.Match object; span=(0, 4), match='love'>



import re'''
text = "when i was young, i'd listen to the radio"
# 我要把当中所有的i替换成大写的I,怎么做呢?
# 可以使用re.sub函数,def sub(pattern, repl, string, count=0, flags=0):
# 参数:要替换的字符模式  替换成哪些字符  文本  替换的数量(默认为0,表示全部)  匹配模式
print(re.sub(r"i", "I", text))  # when I was young, I'd lIsten to the radIo# 我要把里面的英文全部删除
text = "古明地觉(komeiji satori)是一个来自于东方地灵殿的女孩,它有一个妹妹,叫古明地恋(komeiji koishi)"
# 把\w都替换成空即可,但是注意\w默认是匹配中文(以及日文等等)的,所以要加上flags=re.A,表示只匹配ASCII码中的字符
print(re.sub(r"\w", "", text, flags=re.A))  # 古明地觉( )是一个来自于东方地灵殿的女孩,它有一个妹妹,叫古明地恋( )
# 不加flags的话,会把中文也剥掉了
print(re.sub(r"\w", "", text))  # ( ),,( )# 除此之外还有一个函数叫做subn,和sub一样,但是除了返回替换的内容,还会返回替换的数量
print(re.subn("a", "b", "accaccacc"))  # ('bccbccbcc', 3)



import re'''
text = "abc1def55455ghi6621"
# 我要按照数字切割,最终只保留,abc def ghi该怎么做呢?
# 直接调用split即可, 此外还可以指定最多分割多少次,不指定默认为全部。这里表示用数字切割
print(re.split(r"\d", text))  # ['abc', 'def', '', '', '', '', 'ghi', '', '', '', '']
# 可以手动去掉空格
no_space = list(filter(lambda x: len(x), re.split(r"\d", text)))
print(no_space)  # ['abc', 'def', 'ghi']



import difflib'''



import difflibtext1 = '''Half the people on our streets look as though life was a sorry business.
It is hard to find a happy looking man or woman. xxx
Worry is the cause of their woebegone appearance.
Worry makes the wrinkles; worry cuts the deep, down-glancing lines on the face;
worry is the worst disease of our modern times.
text2 = '''Half the people on our streets look as though life was a sorry business.
It is hard to find a happy looking man or woman.
Worry is the cause of their woebegone appearance.
Worry makes the wrinkles; worry cuts the deep, down-glancing lines on the face;
worry is the worst disease of our modern timeS.
xxx xxx  xxx  xxx
# 将文本分解成由单个文本行构成的序列,与传入大量字符串相比,会有更可读的输出
text1_lines = text1.splitlines()
text2_lines = text2.splitlines()d = difflib.Differ()
diff = d.compare(text1_lines, text2_lines)
# 得到的diff是一个生成器
# 输出结果如下
'''Half the people on our streets look as though life was a sorry business.
- It is hard to find a happy looking man or woman. xxx
?                                                  ---+ It is hard to find a happy looking man or woman. Worry is the cause of their woebegone appearance. Worry makes the wrinkles; worry cuts the deep, down-glancing lines on the face;
- worry is the worst disease of our modern times.
?                                              ^+ worry is the worst disease of our modern timeS.
?                                              ^+ xxx xxx  xxx  xxx
# 首先第一行是一样的,直接打印
# 第二行,第一个序列,我故意在结尾多添了xxx,所以输出已经用___进行了标记,并且开头出现了?表示强调出现了变更,说白了我觉得就是为了做标记而单起一行
# 如果出现了差异,那么两个序列都会打印出现差异的行,因此下面还会输出一次,并且前缀是+,表示这是第二个序列
# 然后面下面两个是没有问题的,所以正常输出
# 然后再下一行我故意将times替换成了timeS,因此出现差异。
# 最后一行我多添加了xxx xxx  xxx  xxx,也显示了第二个序列多了这么些内容



from difflib import SequenceMatchertext1 = "abcd"
text2 = "bcde"
s1 = SequenceMatcher(None, text1, text2)
# 寻找最长匹配的字符,里面接收四个参数,分别为text1查找的起始位置和终止位置,text2查找的起始位置和终止位置
match = s1.find_longest_match(0, len(text1), 0, len(text2))
print(match)  # Match(a=1, b=0, size=3)
print(match.a, match.b, match.size)  # 1 0 3
print(text1[match.a: match.a+match.size])  # bcd
print(text2[match.b: match.b+match.size])  # bcdtext1 = "when i was young, i would listen to the radio"
text2 = "when i was old, i will listen to the radio"
s2 = SequenceMatcher(None, text1, text2)
match = s2.find_longest_match(0, len(text1), 0, len(text2))
print(match)  # Match(a=25, b=22, size=20)text1 = "abcdefkaaa"
text2 = "abcdefkaaa"
# 这里的第一个参数可以指定成一个匿名函数,这里表示当遇到k这个字符就停止扫描(包括k)
# 如果不指定那么由于两个序列一样,会得到全部
s2 = SequenceMatcher(lambda x: x == "k", text1, text2)
match = s2.find_longest_match(0, len(text1), 0, len(text2))
print(match)  # Match(a=0, b=0, size=7)# 此外还可以计算两个字符的相似程度
text1 = "我的梦想是开飞机"
text2 = "我的梦想是开拖拉机"
s3 = SequenceMatcher(None, text1, text2)
print("%.2f" % s3.ratio())  # 0.82# 有点类似于一个库fuzzywuzzy
from fuzzywuzzy import fuzz
p = fuzz.ratio("我的梦想是开飞机", "我的梦想是开拖拉机")
print(p)  # 82
p2 = fuzz.ratio("我有一只猫", "我有一只猫咪!!!")
print(p2)  # 71
p3 = fuzz.partial_ratio("我有一只猫", "我有一只猫咪!!!")
# 可以看到partial_ratio是如果一方结束了,就不在匹配了,所以这里是100
print(p3)  # 100




