python源码解读之 string.py

python官网说明文档地址: https://docs.python.org/3.6/library/string.html#module-string

git源码代码地址：https://github.com/python/cpython/blob/3.6/Lib/string.py

首先进入交互式命令行中，可以发现我现在用的 python版本是3.6.4
导入string.py 模块
dir(string) 可以看到string模块中有的一些属性，方法，还有类

按正常的分析来看:

双下划线开头的一般都是内置的，以首字母大写开头的一般是该模块下面定义的类
以单下划线开头，一般都是该模块定义的私有类和方法，供需要对外使用的类及方法调用
以小写字母开头的一般就是该模块下向外提供可以调用的属性及方法了

源码部分一：

__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords","digits", "hexdigits", "octdigits", "printable", "punctuation","whitespace", "Formatter", "Template"]import _string# Some strings for ctype-style character classification
whitespace = ' \t\n\r\v\f'
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
ascii_letters = ascii_lowercase + ascii_uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
printable = digits + ascii_letters + punctuation + whitespace

使用示例代码：

import string
'''
很明显，下面的9个属性，就是源码中写死的，其实这个模块我之前用的不是很多，无非就是不想手打了，可以来这里直接拿着用或是看看
1、二十六个英文字母小写+大写
2、二十六个英文字母小写
3、二十六个英文字母大写
4、十进制中的十个数
5、十六进制
6、八进制
7、标点符号
8、不可见的符号
9、所有字符
看到源码，就知道下面这9种属性，最后会输出什么了，按需来取就可以
'''
print(string.ascii_letters)
print(string.ascii_lowercase)
print(string.ascii_uppercase)
print(string.digits)
print(string.hexdigits)
print(string.octdigits)
print(string.punctuation)
print(string.whitespace)
print(string.printable)

源码部分二：

def capwords(s, sep=None):"""capwords(s [,sep]) -> stringSplit the argument into words using split, capitalize eachword using capitalize, and join the capitalized words usingjoin.  If the optional second argument sep is absent or None,runs of whitespace characters are replaced by a single spaceand leading and trailing whitespace are removed, otherwisesep is used to split and join the words."""return (sep or ' ').join(x.capitalize() for x in s.split(sep))

去掉说明字符串后,其实就是一行代码的事：

def capwords(s, sep=None):return (sep or ' ').join(x.capitalize() for x in s.split(sep))

使用案例 :

import string
s = 'aBc,ABc,aBC'
sep = ',' # 分隔符
print(string.capwords(s,sep)) #输出结果：Abc,Abc,Abc
#分析：就是将输入的字符串按sep的符号进行分割，然后首字母大写，然后再以sep符号连接起来#===========sep不填，默认为空============
print(string.capwords(s)) #输出结果：Abc,abc,abc
# 分析： 整体从头到尾不用分割，只需要首字母大写，并返回
#从这句源码就可以看到 (sep or ' ').join(x.capitalize() for x in s.split(sep))
1、s.split(None) 返回该字符串以None切割的字符串列表
2、x.capitalize() 将字符串进行首字母大写
3、(sep or ' ').join() 序列中的值以 sep 符号或是' '进行连接

标题总结：就是将字符串里面的单词统统转换为首字母大写，方法有三种，如下：

s = 'hello world'
方法一：
import string
print(string.capwords(s))
方法二：
print(' '.join([x.capitalize() for x in s.split()]))
方法三:
print(s.title())
但是第三种方法有缺点:
"they're bill's friends from the USA".title()
# "They'Re Bill'S Friends From The USA"

字符串对象所有的方法用例说明：https://segmentfault.com/a/1190000004598007

源码部分三：

主要是构建了: Formatter 和 Template 两个类
Formatter类作用，具体使用案例参考: 详细参考链接.
Template类作用,官方给出:

而为了写这两个类, 用到了：
1、re 正则模块的下面这四个部分
re.IGNORECASE # 忽视字母大小写, https://www.cnblogs.com/baxianhua/p/8515680.html

re.VERBOSE: 可以将正则表达式分多行写出，每一行后面写上注释，这样使复杂正则表达式具有可读性。
https://www.jianshu.com/p/e027fd294c44

https://www.cnblogs.com/lirongyang/p/9588668.html
re.escape(cls.delimiter) 让匹配的规则中的字符进行特殊字母含义忽视
https://www.cnblogs.com/xiaojinniu425/p/7615068.html

re.compile(pattern, cls.flags | _re.VERBOSE) # pattern编码
2、 collections 模块中的ChainMap类：将多个字典的键值融合
https://www.cnblogs.com/BeautifulWorld/p/11712684.html

####################################################################
import re as _re
from collections import ChainMap as _ChainMapclass _TemplateMetaclass(type):pattern = r"""%(delim)s(?:(?P<escaped>%(delim)s) |   # Escape sequence of two delimiters(?P<named>%(id)s)      |   # delimiter and a Python identifier{(?P<braced>%(id)s)}   |   # delimiter and a braced identifier(?P<invalid>)              # Other ill-formed delimiter exprs)"""def __init__(cls, name, bases, dct):super(_TemplateMetaclass, cls).__init__(name, bases, dct)if 'pattern' in dct:pattern = cls.patternelse:pattern = _TemplateMetaclass.pattern % {'delim' : _re.escape(cls.delimiter),'id'    : cls.idpattern,}cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)class Template(metaclass=_TemplateMetaclass):"""A string class for supporting $-substitutions."""delimiter = '$'# r'[a-z]' matches to non-ASCII letters when used with IGNORECASE,# but without ASCII flag.  We can't add re.ASCII to flags because of# backward compatibility.  So we use local -i flag and [a-zA-Z] pattern.# See https://bugs.python.org/issue31672idpattern = r'(?-i:[_a-zA-Z][_a-zA-Z0-9]*)'flags = _re.IGNORECASEdef __init__(self, template):self.template = template# Search for $$, $identifier, ${identifier}, and any bare $'sdef _invalid(self, mo):i = mo.start('invalid')lines = self.template[:i].splitlines(keepends=True)if not lines:colno = 1lineno = 1else:colno = i - len(''.join(lines[:-1]))lineno = len(lines)raise ValueError('Invalid placeholder in string: line %d, col %d' %(lineno, colno))def substitute(*args, **kws):if not args:raise TypeError("descriptor 'substitute' of 'Template' object ""needs an argument")self, *args = args  # allow the "self" keyword be passedif len(args) > 1:raise TypeError('Too many positional arguments')if not args:mapping = kwselif kws:mapping = _ChainMap(kws, args[0])else:mapping = args[0]# Helper function for .sub()def convert(mo):# Check the most common path first.named = mo.group('named') or mo.group('braced')if named is not None:return str(mapping[named])if mo.group('escaped') is not None:return self.delimiterif mo.group('invalid') is not None:self._invalid(mo)raise ValueError('Unrecognized named group in pattern',self.pattern)return self.pattern.sub(convert, self.template)def safe_substitute(*args, **kws):if not args:raise TypeError("descriptor 'safe_substitute' of 'Template' object ""needs an argument")self, *args = args  # allow the "self" keyword be passedif len(args) > 1:raise TypeError('Too many positional arguments')if not args:mapping = kwselif kws:mapping = _ChainMap(kws, args[0])else:mapping = args[0]# Helper function for .sub()def convert(mo):named = mo.group('named') or mo.group('braced')if named is not None:try:return str(mapping[named])except KeyError:return mo.group()if mo.group('escaped') is not None:return self.delimiterif mo.group('invalid') is not None:return mo.group()raise ValueError('Unrecognized named group in pattern',self.pattern)return self.pattern.sub(convert, self.template)########################################################################
# the Formatter class
# see PEP 3101 for details and purpose of this class# The hard parts are reused from the C implementation.  They're exposed as "_"
# prefixed methods of str.# The overall parser is implemented in _string.formatter_parser.
# The field name parser is implemented in _string.formatter_field_name_splitclass Formatter:def format(*args, **kwargs):if not args:raise TypeError("descriptor 'format' of 'Formatter' object ""needs an argument")self, *args = args  # allow the "self" keyword be passedtry:format_string, *args = args # allow the "format_string" keyword be passedexcept ValueError:if 'format_string' in kwargs:format_string = kwargs.pop('format_string')import warningswarnings.warn("Passing 'format_string' as keyword argument is ""deprecated", DeprecationWarning, stacklevel=2)else:raise TypeError("format() missing 1 required positional ""argument: 'format_string'") from Nonereturn self.vformat(format_string, args, kwargs)def vformat(self, format_string, args, kwargs):used_args = set()result, _ = self._vformat(format_string, args, kwargs, used_args, 2)self.check_unused_args(used_args, args, kwargs)return resultdef _vformat(self, format_string, args, kwargs, used_args, recursion_depth,auto_arg_index=0):if recursion_depth < 0:raise ValueError('Max string recursion exceeded')result = []for literal_text, field_name, format_spec, conversion in \self.parse(format_string):# output the literal textif literal_text:result.append(literal_text)# if there's a field, output itif field_name is not None:# this is some markup, find the object and do#  the formatting# handle arg indexing when empty field_names are given.if field_name == '':if auto_arg_index is False:raise ValueError('cannot switch from manual field ''specification to automatic field ''numbering')field_name = str(auto_arg_index)auto_arg_index += 1elif field_name.isdigit():if auto_arg_index:raise ValueError('cannot switch from manual field ''specification to automatic field ''numbering')# disable auto arg incrementing, if it gets# used later on, then an exception will be raisedauto_arg_index = False# given the field_name, find the object it references#  and the argument it came fromobj, arg_used = self.get_field(field_name, args, kwargs)used_args.add(arg_used)# do any conversion on the resulting objectobj = self.convert_field(obj, conversion)# expand the format spec, if neededformat_spec, auto_arg_index = self._vformat(format_spec, args, kwargs,used_args, recursion_depth-1,auto_arg_index=auto_arg_index)# format the object and append to the resultresult.append(self.format_field(obj, format_spec))return ''.join(result), auto_arg_indexdef get_value(self, key, args, kwargs):if isinstance(key, int):return args[key]else:return kwargs[key]def check_unused_args(self, used_args, args, kwargs):passdef format_field(self, value, format_spec):return format(value, format_spec)def convert_field(self, value, conversion):# do any conversion on the resulting objectif conversion is None:return valueelif conversion == 's':return str(value)elif conversion == 'r':return repr(value)elif conversion == 'a':return ascii(value)raise ValueError("Unknown conversion specifier {0!s}".format(conversion))# returns an iterable that contains tuples of the form:# (literal_text, field_name, format_spec, conversion)# literal_text can be zero length# field_name can be None, in which case there's no#  object to format and output# if field_name is not None, it is looked up, formatted#  with format_spec and conversion and then useddef parse(self, format_string):return _string.formatter_parser(format_string)# given a field_name, find the object it references.#  field_name:   the field being looked up, e.g. "0.name"#                 or "lookup[3]"#  used_args:    a set of which args have been used#  args, kwargs: as passed in to vformatdef get_field(self, field_name, args, kwargs):first, rest = _string.formatter_field_name_split(field_name)obj = self.get_value(first, args, kwargs)# loop through the rest of the field_name, doing#  getattr or getitem as neededfor is_attr, i in rest:if is_attr:obj = getattr(obj, i)else:obj = obj[i]return obj, first