
 1 def statisticWord():
 2     line_number = 0
 3     words_dict = {}
 4     with open (r'D:\test\test.txt',encoding='utf-8') as a_file:
 5         for line in a_file:
 6             words = re.findall(r'&#\d+;|&#\d+;|&\w+;',line)
 7             for word in words:
 8                 words_dict[word] = words_dict.get(word,0) + 1 #get the value of word, default is 0
 9         sort_words_dict = OrderedDict(sorted(words_dict.items(),key = lambda x : x[1], reverse = True))
10 #        sort_words_dict = sorted(words_dict, key = operator.itemgetter(1))
11         with open(r'D:\test\output.txt',encoding = 'utf-8', mode='w') as b_file:
12             for k,v in sort_words_dict.items():
13                 b_file.write("%-15s:%15s" % (k,v))
14                 b_file.write('\n')

2. 通过命令行参数

def statisticWord2():if len(sys.argv) == 1 or sys.argv[1] in {"-h", "--help"}:print("usage: filename_1 filename_2 ... filename_n")sys.exit()else:words = {}strip = string.whitespace + string.punctuation + string.digits + "\"'"for filename in sys.argv[1:]:for line in open(filename):for word in line.split():word = word.strip(strip) # remove all the combination of strip in prefix or suffixif len(word) >= 2:words[word] = words.get(word, 0) + 1for word in sorted(words):print("'{0}' occurs {1} times".format(word,words[word]))


