1  爬取拼音和笔顺

拼音爬自https://zidian.900cha.com/。数据文件汉字拼音带音标和笔顺共20842字(“壭亪寽兯嚸”这五个字没收)

笔顺爬自http://bs.kaishicha.com/。数据文件汉字笔顺共20842字(“壭亪寽兯嚸”这五个字没收)

public class CharUnit
{/// <summary>/// 汉字/// </summary>public char Char;/// <summary>/// 偏旁部首/// </summary>public char Radical;/// <summary>/// 总笔画数/// </summary>public byte StrokeCount;/// <summary>/// 笔顺/// </summary>public string Strokes;/// <summary>/// 拼音个数/// </summary>public byte PinyinCount;/// <summary>/// 拼音/// </summary>public string[] PinyinList;public static CharUnit Deserialize(BinaryReader binaryReader){var charUnit = new CharUnit();charUnit.Char = binaryReader.ReadChar();charUnit.Radical = binaryReader.ReadChar();charUnit.StrokeCount = binaryReader.ReadByte();charUnit.Strokes = binaryReader.ReadString();charUnit.PinyinCount = binaryReader.ReadByte();charUnit.PinyinList = new string[(int)charUnit.PinyinCount];for (int i = 0; i < (int)charUnit.PinyinCount; i++){charUnit.PinyinList[i] = binaryReader.ReadString();}return charUnit;}public void Serialize(BinaryWriter binaryWriter){binaryWriter.Write(this.Char);binaryWriter.Write(this.Radical);binaryWriter.Write(this.StrokeCount);binaryWriter.Write(this.Strokes);binaryWriter.Write(this.PinyinCount);for (int i = 0; i < (int)this.PinyinCount; i++){binaryWriter.Write(this.PinyinList[i]);}}
}

2  vs2019新建.net core console项目,NuGet导入

Microsoft.EntityFrameworkCore              //ef core
Microsoft.EntityFrameworkCore.Design       //在nuget
Microsoft.EntityFrameworkCore.Tools        //控制台中管理数据迁移
Microsoft.EntityFrameworkCore.Sqlite       //sqlite
Microsoft.EntityFrameworkCore.Sqlite.Core  //sqlite
HtmlAgilityPack                            //xpath

3  共五个表:汉字、部首、笔顺、拼音、拼音汉字many-to-many辅助表。部首和汉字是one-to-many,笔顺和汉字是one-to-one。

public class ChineseChar
{public ChineseChar()=> PinYins = new JoinCollectionFacade<PinYin, PinYinChar>(PinYinChars,pyc => pyc.PinYin,py => new PinYinChar { PinYin = py, ChineseChar = this });public int ChineseCharId { get; set; }[Column(TypeName = "NCHAR(1)"), Required]public char Char { get; set; }public int RadicalId { get; set; }public Radical Radical { get; set; }public CharStroke CharStroke { get; set; }private ICollection<PinYinChar> PinYinChars { get; } = new List<PinYinChar>();[NotMapped]public ICollection<PinYin> PinYins { get; }
}public class PinYin
{public PinYin()=> ChineseChars = new JoinCollectionFacade<ChineseChar, PinYinChar>(PinYinChars,pyc => pyc.ChineseChar,cchar => new PinYinChar { PinYin = this, ChineseChar = cchar });public int PinYinId { get; set; }[Column(TypeName = "NVARCHAR(6)"), Required]public string Pinyin { get; set; }private ICollection<PinYinChar> PinYinChars { get; } = new List<PinYinChar>();[NotMapped]public ICollection<ChineseChar> ChineseChars { get; }
}public class PinYinChar
{public int PinYinId { get; set; }public PinYin PinYin { get; set; }public int ChineseCharId { get; set; }public ChineseChar ChineseChar { get; set; }
}public class Radical
{public int RadicalId { get; set; }[Column(TypeName = "NCHAR(1)"), Required]public char RadicalOfChar { get; set; }public ICollection<ChineseChar> ChineseChars { get; } = new List<ChineseChar>();
}public class CharStroke
{[Key]public int ChineseCharId { get; set; }[Column(TypeName = "TINYINT"), Required]public int StrokeCount { get; set; }[Column(TypeName = "NVARCHAR(120)"), Required]public string Stroke { get; set; }public ChineseChar ChineseChar { get; set; }
}public class PinyinConvertContext : DbContext
{public DbSet<ChineseChar> ChineseChar { get; set; }public DbSet<Radical> Radical { get; set; }public DbSet<CharStroke> CharStroke { get; set; }public DbSet<PinYin> PinYin { get; set; }protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder){optionsBuilder.UseSqlite($"Data Source = {AppContext.BaseDirectory}/PinyinConvert.db");}protected override void OnModelCreating(ModelBuilder modelBuilder){//one-to-onemodelBuilder.Entity<ChineseChar>().HasOne(cc => cc.CharStroke).WithOne(cs => cs.ChineseChar).HasForeignKey<CharStroke>(cc => cc.ChineseCharId);//one-to-manymodelBuilder.Entity<ChineseChar>().HasOne(cc => cc.Radical).WithMany(r => r.ChineseChars).HasForeignKey(cc => cc.RadicalId);//many-to-manymodelBuilder.Entity<PinYinChar>().HasKey(pyc => new { pyc.PinYinId, pyc.ChineseCharId});modelBuilder.Entity<PinYinChar>().HasOne(pyc => pyc.PinYin).WithMany("PinYinChars");modelBuilder.Entity<PinYinChar>().HasOne(pyc => pyc.ChineseChar).WithMany("PinYinChars");}
}

many-to-many参考https://blog.oneunicorn.com/2017/09/25/many-to-many-relationships-in-ef-core-2-0-part-3-hiding-as-icollection/做了改动,隐藏了属性PinYinChar,增加了类JoinCollectionFacade

public class JoinCollectionFacade<T, TJoin> : ICollection<T>
{private readonly ICollection<TJoin> _collection;private readonly Func<TJoin, T> _selector;private readonly Func<T, TJoin> _creator;public JoinCollectionFacade(ICollection<TJoin> collection,Func<TJoin, T> selector,Func<T, TJoin> creator){_collection = collection;_selector = selector;_creator = creator;}public IEnumerator<T> GetEnumerator()=> _collection.Select(e => _selector(e)).GetEnumerator();IEnumerator IEnumerable.GetEnumerator()=> GetEnumerator();public void Add(T item)=> _collection.Add(_creator(item));public void Clear()=> _collection.Clear();public bool Contains(T item)=> _collection.Any(e => Equals(_selector(e), item));public void CopyTo(T[] array, int arrayIndex)=> this.ToList().CopyTo(array, arrayIndex);public bool Remove(T item)=> _collection.Remove(_collection.FirstOrDefault(e => Equals(_selector(e), item)));public int Count=> _collection.Count;public bool IsReadOnly=> _collection.IsReadOnly;
}

4  Migration

Add-Migration Init
Update-Database

5  初始化数据库数据

public static void InitDataBase()
{var data = GetCharUnits();//爬来的两个m不带声调,手动修改var t = data.FirstOrDefault(item => item.Char == '呣');t.PinyinList[1] = "ḿ";t.PinyinList[2] = "m̀";var radicals = (from radical in (from item in dataselect item.Radical).Distinct()select new Radical { RadicalOfChar = radical }).ToList();var pinyins = (from pinyin in (from item in datafrom tmp in item.PinyinListselect tmp).Distinct()select new PinYin { Pinyin = pinyin }).ToList();using (var context = new PinyinConvertContext()){context.Database.EnsureCreated();using (var transaction = context.Database.BeginTransaction()){foreach (var item in data){var cchar = new ChineseChar { Char = item.Char };//one-to-onevar stroke = new CharStroke { StrokeCount = item.StrokeCount, Stroke = item.Strokes };cchar.CharStroke = stroke;context.CharStroke.Add(stroke);//one-to-manyvar radical = radicals.FirstOrDefault(r => r.RadicalOfChar == item.Radical);radical.ChineseChars.Add(cchar);//many-to-manyforeach (var str in item.PinyinList){var pinyin = pinyins.FirstOrDefault(p => str == p.Pinyin);cchar.PinYins.Add(pinyin);}context.ChineseChar.Add(cchar);}context.AddRange(radicals);context.SaveChanges();transaction.Commit();}}
}private static IEnumerable<CharUnit> GetCharUnits()
{var result = new List<CharUnit>();var file = Path.Combine(SpecialDirectories.MyDocuments, "charunits.cu");using (var input = File.OpenRead(file))using (var reader = new BinaryReader(input)){try{while (true){var charUnit = CharUnit.Deserialize(reader);result.Add(charUnit);}}catch { }}return result;
}

charunits.cu文件放在文档库根目录下。

6  汉字转拼音

/// <summary>
/// 多个汉字<paramref name="str"/>的拼音
/// </summary>
public static string ChineseCharToPinyin(string str, PinyinConvertContext context)=> string.Join(" ", from ch in strselect ChineseCharToPinyin(ch, context) into tmpselect (tmp.Count() <= 1 ? "" : "[") +string.Join(" ", tmp) +(tmp.Count() <= 1 ? "" : "]"));/// <summary>
/// 汉字<paramref name="ch"/>的所有拼音
/// </summary>
public static IEnumerable<string> ChineseCharToPinyin(char ch, PinyinConvertContext context)
{var cchars = context.ChineseChar.AsNoTracking().Include("PinYinChars.PinYin").ToList();return (from cchar in ccharswhere cchar.Char == chfrom pinyin in cchar.PinYinsselect pinyin.Pinyin).ToList();
}[TestMethod]
public async Task ChineseCharToPinyinTest()
{using(var context = new PinyinConvertContext()){context.Database.EnsureCreated();var test = ChineseCharToPinyin('仛', context).ToArray();var expected = new string[] { "tuō", "chà", "duó" };CollectionAssert.AreEquivalent(expected, test);var testpinyin = ChineseCharToPinyin("可惜不是你", context);var expectedpinyin = @"[kě kè] xī [bù fǒu] shì nǐ";Assert.AreEqual(expectedpinyin, testpinyin);}
}

7  拼音转汉字

/// <summary>
/// 拼音<paramref name="pinyin"/>的所有汉字
/// </summary>
public static IEnumerable<char> PinyinToChineseChar(string pinyin, PinyinConvertContext context)
{var pinyins = context.PinYin.AsNoTracking().Include("PinYinChars.ChineseChar").ToList();return (from item in pinyinswhere item.Pinyin == pinyinfrom cchar in item.ChineseCharsselect cchar.Char).ToList();
}[TestMethod]
public async Task PinyinToChineseCharTest()
{using(var context = new PinyinConvertContext()){context.Database.EnsureCreated();var test = PinyinToChineseChar("tā", context).ToArray();var expected = new char[] { '她','他','它','牠','祂','铊','趿','鉈','榙','塌','溻','褟' };CollectionAssert.AreEquivalent(expected, test);}
}

8  汉字部首

/// <summary>
/// 汉字<paramref name="ch"/>的部首
/// </summary>
public static char GetRadical(char ch, PinyinConvertContext context)
{var cchars = context.ChineseChar.AsNoTracking().Include(cc => cc.Radical).ToList();return (from cchar in ccharswhere cchar.Char == chselect cchar.Radical.RadicalOfChar).FirstOrDefault();
}[TestMethod]
public async Task GetRadicalTest()
{using(var context = new PinyinConvertContext()){context.Database.EnsureCreated();var test = GetRadical('烎', context);var expected = '火';Assert.AreEqual(expected, test);}
}

9  汉字笔顺

/// <summary>
/// 汉字<paramref name="ch"/>的笔顺
/// </summary>
public static string GetStrokes(char ch, PinyinConvertContext context)
{var cchars = context.ChineseChar.AsNoTracking().Include(cc => cc.CharStroke).ToList();return (from cchar in ccharswhere cchar.Char == chselect cchar.CharStroke.Stroke).FirstOrDefault();
}[TestMethod]
public async Task GetStrokesTest()
{using(var context = new PinyinConvertContext()){context.Database.EnsureCreated();var test = GetStrokes('嘦', context);var expected = "竖\t横折\t横\t撇\t点\t横\t竖\t横折\t竖\t竖\t横\t撇点\t撇\t横";Assert.AreEqual(expected, test);}
}

【笔记】汉字拼音互转(带音标和笔顺)共20842字相关推荐

  1. java取汉字拼音首字母含多音字及不常见的字

    package com.anxin.ssk.common;import java.io.UnsupportedEncodingException; import java.util.HashMap; ...

  2. Android带数字拼音与带音标拼音互转工具类

    - 前言 刚进入上一家公司时,编写过一个学汉语项目,这个项目对我的锻炼还是也挺大的,毕竟一个刚毕业的大学生,从来没有接手过公司项目,而所从事的公司是一家偏硬件的语音公司,手机端编程的人员在我之前几个月 ...

  3. JS版汉字与拼音互转终极方案,附简单的JS拼音输入法

    原文:http://www.cnblogs.com/liuxianan/p/pinyinjs.html 前言 网上关于JS实现汉字和拼音互转的文章很多,但是比较杂乱,都是互相抄来抄去,而且有的不支持多 ...

  4. 【干货】JS版汉字与拼音互转终极方案,附简单的JS拼音输入法

    转自:小茗同学的博客:小茗同学博客 网上关于JS实现汉字和拼音互转的文章很多,但是比较杂乱,都是互相抄来抄去,而且有的不支持多音字,有的不支持声调,有的字典文件太大,还比如有时候我仅仅是需要获取汉字拼 ...

  5. unity | 输入汉字自动转换成带拼音(带声调)

    就只需要下载一个带拼音的字体,就可以解决,字体链接放在下面 [汉字拼音体]一款文字头上自带拼音和声调的字体-100font.com 我个傻子...试了好几次用代码转....

  6. 使用win10自带的输入法打出带音标的拼音

    第一次写博客,不想这么麻烦的注意格式,就随便乱写了 由于某些需要,我想打出带音标的拼音,但是又不想特别的去下一个搜狗输入法,所以找到了下面这个方法: ctrl+shift+B 直接调出下面这个面板: ...

  7. [C#][转载]C# 使用微软的Visual Studio International Pack 类库提取汉字拼音首字母

    昨天经过网友提醒,提取汉字拼音的方法可以使用微软的一个类库 Visual Studio International Pack ,今天试了一试,确实好用!下面分享下使用方法: 首先下载Visual St ...

  8. 【原】在vc中实现获取汉字拼音的首字母 - lixiaosan的专栏 - CSDNBlog

    导读: Author: lixiaosan Date: 05/26/2006 /************************************************************ ...

  9. java获取汉字拼音首字母A

    public class GetChinessFirstSpell{ /// <summary> /// 汉字拼音首字母列表 本列表包含了20901个汉字,用于配合 GetChineseS ...

最新文章

  1. vue 之 nextTick 与$nextTick
  2. 编译osdrv的uboot 内核 文件系统(Hi3516a)
  3. acme.sh及https证书实践
  4. 使用协同过滤进行众包服务的工人工作自动化推荐
  5. java虚拟机类加载机制_《深入理解java虚拟机》学习笔记一/类加载机制
  6. [ js处理表单 ]:保存、提交
  7. java语言程序设计第二版课后答案吴倩_java语言程序设计课后答案 郞波 第二版 清华大学出版社...
  8. 新零售时代招商的新鲜玩法——用全网联动 促销活动来招商
  9. Ubuntu11.10 源码编译 Nginx
  10. 四大原因告诉你:云为什么比传统IT系统更安全
  11. 【行为型】Strategy模式
  12. vscode使用:tab键插入空格而不是tab
  13. OpenNI XnSkeletonJointOrientation 簡單分析
  14. Java io流使用相对路径读取文件
  15. 基于自适应扰动的疯狂蝴蝶算法-附代码
  16. linux中进程unit是什么意思,Unit(linux)基础命令
  17. 解决No backends or directors found in VCL program, at least one is necessary. Runn
  18. RFID固定资产盘点的解决方案
  19. 法国主要贸易海港加入区块链货运物流试点
  20. 【VUE】微商城(七)----实现分类页面功能

热门文章

  1. Latex最后的参考文献作者用et.al显示
  2. JSP零基础学习指南
  3. python粒子群算法的实现
  4. anaconda清华镜像更改
  5. vue项目 运行的时候报To install it, you can run: npm install --save ..\common\css\common.scss
  6. IMS QTI 实践指南 | 04 自适应试题 Adaptive Items
  7. 环保无线视频监控系统 环保在线监控数采仪
  8. iic jy901 单片机_用STM32的IIC引脚去读JY901陀螺仪和磁力计的数据
  9. 利用爬虫获取猫眼电影热门前100数据
  10. 全球与中国化学机械抛光研磨材料行业市场深度分析及发展战略咨询报告2022-2028年