将test.py、langconv.py、zh_wiki.py分别放在同一目录下,然后只需要运行test.py就行,运行前请看下test.py的代码注释

test.py

#! /usr/bin/env python  
# -*- coding: utf-8 -*-  
#@author xfzhang
#@version 2018/5/22
#路径不要带中文,文件名也不要带中文
  
import os  
import time
import sys  
from langconv import *  
#源文件目录,不要带中文,自己可以替换
sourceDir = r"D:/1"  
#目标文件目录,不要带中文,自己可以替换
targetDir = r"D:/2"
file_list = []

def simple2tradition(line):  
    line = Converter('zh-hant').convert(line.decode('gbk','ignore'))  
    line = line.encode('gbk', 'ignore')  
    return line

def copyFiles(sourceDir, targetDir):
    
    for f in os.listdir(sourceDir):  
        sourceF = os.path.join(sourceDir, f)  
        targetF = os.path.join(targetDir, f)  
        if os.path.isfile(sourceF):  
            #创建目录  
            if not os.path.exists(targetDir):  
                os.makedirs(targetDir)  
              
            #文件不存在,或者存在但是大小不同,覆盖  
            if not os.path.exists(targetF) or (os.path.exists(targetF) and (os.path.getsize(targetF) != os.path.getsize(sourceF))):  
                #2进制文件  
                file_list.append(targetF)
                open(targetF, "wb").write(open(sourceF, "rb").read())  
                #print (u"%s %s 复制完毕" %(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), targetF))
                
            else:  
                file_list.append(targetF)
                #print (u"%s %s 已存在,不重复复制" %(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), targetF))
                
          
        if os.path.isdir(sourceF):  
            copyFiles(sourceF, targetF) 
    return file_list
          
if __name__ == "__main__":  
    try:  
        import psyco  
        psyco.profile()  
    except ImportError:  
        pass  
    all_file = copyFiles(sourceDir,targetDir)
    for f in all_file:
        one_file = open(f, "r")
        lines = one_file.readlines()
        another_content = []
        for one_line in lines:
            another_content.append(simple2tradition(one_line))
        one_file.close()
        one_file = open(f, "w")
        #print another_content
        for one_line in another_content:
            one_file.write(str(one_line))
        one_file.close()

langconv.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from copy import deepcopy
import re

try:
    import psyco
    psyco.full()
except:
    pass

try:
    from zh_wiki import zh2Hant, zh2Hans
except ImportError:
    from zhtools.zh_wiki import zh2Hant, zh2Hans

import sys
py3k = sys.version_info >= (3, 0, 0)

if py3k:
    UEMPTY = ''
else:
    _zh2Hant, _zh2Hans = {}, {}
    for old, new in ((zh2Hant, _zh2Hant), (zh2Hans, _zh2Hans)):
        for k, v in old.items():
            new[k.decode('utf8')] = v.decode('utf8')
    zh2Hant = _zh2Hant
    zh2Hans = _zh2Hans
    UEMPTY = ''.decode('utf8')

# states
(START, END, FAIL, WAIT_TAIL) = list(range(4))
# conditions
(TAIL, ERROR, MATCHED_SWITCH, UNMATCHED_SWITCH, CONNECTOR) = list(range(5))

MAPS = {}

class Node(object):
    def __init__(self, from_word, to_word=None, is_tail=True,
            have_child=False):
        self.from_word = from_word
        if to_word is None:
            self.to_word = from_word
            self.data = (is_tail, have_child, from_word)
            self.is_original = True
        else:
            self.to_word = to_word or from_word
            self.data = (is_tail, have_child, to_word)
            self.is_original = False
        self.is_tail = is_tail
        self.have_child = have_child

def is_original_long_word(self):
        return self.is_original and len(self.from_word)>1

def is_follow(self, chars):
        return chars != self.from_word[:-1]

def __str__(self):
        return '<Node, %s, %s, %s, %s>' % (repr(self.from_word),
                repr(self.to_word), self.is_tail, self.have_child)

__repr__ = __str__

class ConvertMap(object):
    def __init__(self, name, mapping=None):
        self.name = name
        self._map = {}
        if mapping:
            self.set_convert_map(mapping)

def set_convert_map(self, mapping):
        convert_map = {}
        have_child = {}
        max_key_length = 0
        for key in sorted(mapping.keys()):
            if len(key)>1:
                for i in range(1, len(key)):
                    parent_key = key[:i]
                    have_child[parent_key] = True
            have_child[key] = False
            max_key_length = max(max_key_length, len(key))
        for key in sorted(have_child.keys()):
            convert_map[key] = (key in mapping, have_child[key],
                    mapping.get(key, UEMPTY))
        self._map = convert_map
        self.max_key_length = max_key_length

def __getitem__(self, k):
        try:
            is_tail, have_child, to_word  = self._map[k]
            return Node(k, to_word, is_tail, have_child)
        except:
            return Node(k)

def __contains__(self, k):
        return k in self._map

def __len__(self):
        return len(self._map)

class StatesMachineException(Exception): pass

class StatesMachine(object):
    def __init__(self):
        self.state = START
        self.final = UEMPTY
        self.len = 0
        self.pool = UEMPTY

def clone(self, pool):
        new = deepcopy(self)
        new.state = WAIT_TAIL
        new.pool = pool
        return new

def feed(self, char, map):
        node = map[self.pool+char]

if node.have_child:
            if node.is_tail:
                if node.is_original:
                    cond = UNMATCHED_SWITCH
                else:
                    cond = MATCHED_SWITCH
            else:
                cond = CONNECTOR
        else:
            if node.is_tail:
                cond = TAIL
            else:
                cond = ERROR

new = None
        if cond == ERROR:
            self.state = FAIL
        elif cond == TAIL:
            if self.state == WAIT_TAIL and node.is_original_long_word():
                self.state = FAIL
            else:
                self.final += node.to_word
                self.len += 1
                self.pool = UEMPTY
                self.state = END
        elif self.state == START or self.state == WAIT_TAIL:
            if cond == MATCHED_SWITCH:
                new = self.clone(node.from_word)
                self.final += node.to_word
                self.len += 1
                self.state = END
                self.pool = UEMPTY
            elif cond == UNMATCHED_SWITCH or cond == CONNECTOR:
                if self.state == START:
                    new = self.clone(node.from_word)
                    self.final += node.to_word
                    self.len += 1
                    self.state = END
                else:
                    if node.is_follow(self.pool):
                        self.state = FAIL
                    else:
                        self.pool = node.from_word
        elif self.state == END:
            # END is a new START
            self.state = START
            new = self.feed(char, map)
        elif self.state == FAIL:
            raise StatesMachineException('Translate States Machine '
                    'have error with input data %s' % node)
        return new

def __len__(self):
        return self.len + 1

def __str__(self):
        return '<StatesMachine %s, pool: "%s", state: %s, final: %s>' % (
                id(self), self.pool, self.state, self.final)
    __repr__ = __str__

class Converter(object):
    def __init__(self, to_encoding):
        self.to_encoding = to_encoding
        self.map = MAPS[to_encoding]
        self.start()

def feed(self, char):
        branches = []
        for fsm in self.machines:
            new = fsm.feed(char, self.map)
            if new:
                branches.append(new)
        if branches:
            self.machines.extend(branches)
        self.machines = [fsm for fsm in self.machines if fsm.state != FAIL]
        all_ok = True
        for fsm in self.machines:
            if fsm.state != END:
                all_ok = False
        if all_ok:
            self._clean()
        return self.get_result()

def _clean(self):
        if len(self.machines):
            self.machines.sort(key=lambda x: len(x))
            # self.machines.sort(cmp=lambda x,y: cmp(len(x), len(y)))
            self.final += self.machines[0].final
        self.machines = [StatesMachine()]

def start(self):
        self.machines = [StatesMachine()]
        self.final = UEMPTY

def end(self):
        self.machines = [fsm for fsm in self.machines
                if fsm.state == FAIL or fsm.state == END]
        self._clean()

def convert(self, string):
        self.start()
        for char in string:
            self.feed(char)
        self.end()
        return self.get_result()

def get_result(self):
        return self.final

def registery(name, mapping):
    global MAPS
    MAPS[name] = ConvertMap(name, mapping)

registery('zh-hant', zh2Hant)
registery('zh-hans', zh2Hans)
del zh2Hant, zh2Hans

def run():
    import sys
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option('-e', type='string', dest='encoding',
            help='encoding')
    parser.add_option('-f', type='string', dest='file_in',
            help='input file (- for stdin)')
    parser.add_option('-t', type='string', dest='file_out',
            help='output file')
    (options, args) = parser.parse_args()
    if not options.encoding:
        parser.error('encoding must be set')
    if options.file_in:
        if options.file_in == '-':
            file_in = sys.stdin
        else:
            file_in = open(options.file_in)
    else:
        file_in = sys.stdin
    if options.file_out:
        if options.file_out == '-':
            file_out = sys.stdout
        else:
            file_out = open(options.file_out, 'wb')
    else:
        file_out = sys.stdout

c = Converter(options.encoding)
    for line in file_in:
        # print >> file_out, c.convert(line.rstrip('\n').decode(
        file_out.write(c.convert(line.rstrip('\n').decode(
            'utf8')).encode('utf8'))

if __name__ == '__main__':

run()

zh_wiki.py

# -*- coding: utf-8 -*-
# copy fom wikipedia

zh2Hant = {
'呆': '獃',
"打印机": "印表機",
'帮助文件': '說明檔案',
"画": "畫",
"龙": "竜",
"板": "板",
"表": "表",
"才": "才",
"丑": "醜",
"出": "出",
"淀": "澱",
"冬": "冬",
"范": "範",
"丰": "豐",
"刮": "刮",
"后": "後",
"胡": "胡",
"回": "回",
"伙": "夥",
"姜": "薑",
"借": "借",
"克": "克",
"困": "困",
"漓": "漓",
"里": "里",
"帘": "簾",
"霉": "霉",
"面": "面",
"蔑": "蔑",
"千": "千",
"秋": "秋",
"松": "松",
"咸": "咸",
"向": "向",
"余": "餘",
"郁": "鬱",
"御": "御",
"愿": "願",
"云": "雲",
"芸": "芸",
"沄": "沄",
"致": "致",
"制": "制",
"朱": "朱",
"筑": "築",
"准": "準",
"厂": "廠",
"广": "廣",
"辟": "闢",
"别": "別",
"卜": "卜",
"沈": "沈",
"冲": "沖",
"种": "種",
"虫": "蟲",
"担": "擔",
"党": "黨",
"斗": "鬥",
"儿": "兒",
"干": "乾",
"谷": "谷",
"柜": "櫃",
"合": "合",
"划": "劃",
"坏": "壞",
"几": "幾",
"系": "系",
"家": "家",
"价": "價",
"据": "據",
"卷": "捲",
"适": "適",
"蜡": "蠟",
"腊": "臘",
"了": "了",
"累": "累",
"么": "麽",
"蒙": "蒙",
"万": "萬",
"宁": "寧",
"朴": "樸",
"苹": "蘋",
"仆": "僕",
"曲": "曲",
"确": "確",
"舍": "舍",
"胜": "勝",
"术": "術",
"台": "台",
"体": "體",
"涂": "塗",
"叶": "葉",
"吁": "吁",
"旋": "旋",
"佣": "傭",
"与": "與",
"折": "折",
"征": "徵",
"症": "症",
"恶": "惡",
"发": "發",
"复": "復",
"汇": "匯",
"获": "獲",
"饥": "飢",
"尽": "盡",
"历": "歷",
"卤": "滷",
"弥": "彌",
"签": "簽",
"纤": "纖",
"苏": "蘇",
"坛": "壇",
"团": "團",
"须": "須",
"脏": "臟",
"只": "只",
"钟": "鐘",
"药": "藥",
"同": "同",
"志": "志",
"杯": "杯",
"岳": "岳",
"布": "布",
"当": "當",
"吊": "弔",
"仇": "仇",
"蕴": "蘊",
"线": "線",
"为": "為",
"产": "產",
"众": "眾",
"伪": "偽",
"凫": "鳧",
"厕": "廁",
"启": "啟",
"墙": "牆",
"壳": "殼",
"奖": "獎",
"妫": "媯",
"并": "並",
"录": "錄",
"悫": "愨",
"极": "極",
"沩": "溈",
"瘘": "瘺",
"硷": "鹼",
"竖": "豎",
"绝": "絕",
"绣": "繡",
"绦": "絛",
"绱": "緔",
"绷": "綳",
"绿": "綠",
"缰": "韁",
"苧": "苎",
"莼": "蒓",
"说": "說",
"谣": "謠",
"谫": "譾",
"赃": "贓",
"赍": "齎",
"赝": "贗",
"酝": "醞",
"采": "採",
"钩": "鉤",
"钵": "缽",
"锈": "銹",
"锐": "銳",
"锨": "杴",
"镌": "鐫",
"镢": "钁",
"阅": "閱",
"颓": "頹",
"颜": "顏",
"骂": "罵",
"鲇": "鯰",
"鲞": "鯗",
"鳄": "鱷",
"鸡": "雞",
"鹚": "鶿",
"荡": "盪",
"锤": "錘",
"㟆": "㠏",
"㛟": "

python将简体字转化为繁体字相关推荐

  1. C#把数据库表里简体字转化为繁体字

    前言: 有一个需求改动,需要把一张表里的所有数据从简体中文改为繁体中文.表的数据也不多,大概500左右.大概有几种思路: 1.手动翻译,然后用Navicat等工具直接文本替换(相同文本多还好,少还是很 ...

  2. 获取两个时间之间的间隔_花了两天,终于把 Python 的时间转化给整明白了(超多图解)...

    来源:Python编程时光(未经授权不得私自转载) Python 的时间转化,我相信是很多开发者的痛.学得慢,忘得快,一到要用就去百度,效率太低.建议收藏这些文章(但也别忘了点赞呐,求你了),需要的时 ...

  3. python 语料_用python将语料转化为可计算的形式

    1.[用python将语料转化为可计算的形式代码]语料向量化 #-*- coding:utf-8 -*- #语料向量化表示方法 #以下代码参考 Natural Language Processing ...

  4. Python将阿拉伯数字转化为中文大写

    利用Python将阿拉伯数字转化为中文大写,其实最麻烦的地方就是中间空多个0的问题,这种情况下,采用拆分法则,将一个大数字,先拆分成整数部分和小数部分,再对整数部分按照仟.万.亿.兆分位拆分为四个字符 ...

  5. python实现矩阵转化图像

    python实现矩阵转化图像 280*280 格式 黑白表达: # coding=gbk # 实现读取一个TXT文件,将文件中的数据存放在一个列表中, # 再将列表逐渐转换为数组和矩阵 # 最后利用矩 ...

  6. python实现图片转化技术(UI:TK Core:PIL)

    有很多时候,我们都需要转化图片格式,所以我们来使用python制作一个转化工具 [懒懒的朋友可以直接划到文章末尾复制全部代码,别忘记点赞,收藏,关注!] 效果: 可以看到,支持转化为四种格式,png, ...

  7. pythonppt_用Python实现ppt转化图片(附带长图合并功能)

    用Python实现ppt转化图片(附带长图合并功能) 前言 笔者前一阵在学习数据相关的东西,从初学开始,一直在参加社区中的图表小挑战,此项活动是社区出题人给出一定官方数据,参加小挑战的人员可以对数据进 ...

  8. Python 流程图 — 一键转化代码为流程图

    Graphviz是一个可以对图进行自动布局的绘图工具,由贝尔实验室开源.我们在上次 Python 快速绘制画出漂亮的系统架构图 提到的diagrams,其内部的编排逻辑就用到了这个开源工具包. 而今天 ...

  9. python将汉字转化为拼音

    python将汉字转化为拼音 安装 依赖python包:pypinyin 直接pip install pypinyin即可 常用方法及场景案例 from pypinyin import pinyin, ...

最新文章

  1. asp.net webform 复制窗体代码_逆向分析流氓软件自我复制以及防御思路
  2. 【MediaPipe】(2) AI视觉,人体姿态关键点实时跟踪,附python完整代码
  3. php orm url,PHP ORM使用之
  4. 常用的时间字符串转换
  5. Java多线程之Callable、Future和FutureTask
  6. Centos7.4 安装elasticsearch6.1.3集群部署
  7. [Nodejs原理] 核心库Libuv入门(Hello World篇)
  8. linux 装windows软件,常用软件的安装(windows/linux)
  9. 记一次和摄像头的摩擦经历
  10. 的计时器设置_如何选择最适合自己的计时器?
  11. 2020年最前沿的 8 本AI技术图书—文末留言赠8本
  12. 查看Oracle数据库的用户名和密码
  13. Flutter之播放视频
  14. mysql运动会信息管理系统_运动会管理信息系统
  15. i2c-tools 使用集锦
  16. 智慧校园安防平台应用解决方案
  17. Norton Ghost V12
  18. DeepFaceLab:A simple,flexible and extensible face swapping framework(2020)
  19. RGCF: Refined Graph Convolution Collaborative Filering withConcise and Expressive Embedding
  20. 用 Neo4j 快速构建明星关系图谱,你一定感兴趣

热门文章

  1. tsv文件怎么用Excel打开具体打开方法教程
  2. poc测试环境准备_华为桌面云 【环境搭建类】 Poc测试经验总结-环境搭建
  3. 【参赛作品47】openGauss数据库源码学习-指标采集、预测与异常检测
  4. 网络安全进阶篇(十一章-6)APP渗透测试篇(中)
  5. C#连接远程数据库-连接云服务器数据库
  6. C++ JSON处理库 CJsonObject 的使用
  7. 手把手教你制作一个PWA应用教程
  8. SpringBoot网上商城(源代码+数据库)014
  9. 基于语音识别的提醒闹钟项目总结
  10. 电大计算机专业英语形成性作业,电大: 计算机专业英语形成性考核册答案.doc...