几乎没接触过C#,不过跟java很相似,花了一天时间,将原作者的代码改成了java版。

Splitter.java文件如下

import java.util.regex.Pattern;/*** Created by ajtdnyy on 13-9-3.*/
public class Splitter {Pattern pattern;Pattern[] patterns;boolean flag = true;public Splitter(Pattern pattern) {this.pattern = pattern;}public Splitter(Pattern pattern, Pattern[] patterns) {this.pattern = pattern;this.patterns = patterns;}public Splitter(Pattern pattern, Pattern[] patterns, boolean flag) {this.pattern = pattern;this.flag = flag;this.patterns = patterns;}
}

Segment.java类如下

import java.util.regex.Pattern;/*** Created by ajtdnyy on 13-9-3.*/
public class Segment {String value;Pattern pattern;public Segment(String value, Pattern pattern) {this.value = value;this.pattern = pattern;}
}

ChineseAddress.java类如下

import java.util.List;/*** Created by ajtdnyy on 13-9-3.*/
public class ChineseAddress {public String source;public String nation;public String province;public String city;public String county;public String district;public String street;public List<String> roads;public String number;public String plaza;public String ip;public String town;public String village;public String zone;public String underground;public List<String> notes;public List<String> noises;private static final String SEPARATOR = System.getProperty("line.separator");public String toString() {String s = "src: " + source + SEPARATOR;if (nation != null) {s = s + "nat: " + nation + SEPARATOR;}if (province != null) {s = s + "pro: " + province + SEPARATOR;}if (city != null) {s = s + "cit: " + city + SEPARATOR;}if (county != null) {s = s + "cou: " + county + SEPARATOR;}if (district != null) {s = s + "dis: " + district + SEPARATOR;}if (street != null) {s = s + "str: " + street + SEPARATOR;}if (number != null) {s = s + "num: " + number + SEPARATOR;}if (plaza != null) {s = s + "pla: " + plaza + SEPARATOR;}if (ip != null) {s = s + "idp: " + ip + SEPARATOR;}if (town != null) {s = s + "twn: " + town + SEPARATOR;}if (village != null) {s = s + "vil: " + village + SEPARATOR;}if (zone != null) {s = s + "zon: " + zone + SEPARATOR;}if (underground != null) {s = s + "udg: " + underground + SEPARATOR;}if (roads != null) {s = s + "rod: ";for (int i = 0; i < roads.size(); i++) {String r = roads.get(i);if (r == roads.get(0)) {s = s + r;} else {s = s + " / " + r;}}s = s + SEPARATOR;}if (notes != null) {s = s + "not: ";for (int i = 0; i < notes.size(); i++) {String n = notes.get(i);if (n == roads.get(0)) {s = s + n;} else {s = s + " / " + n;}}s = s + SEPARATOR;}if (noises != null) {s = s + "noi: ";for (int i = 0; i < noises.size(); i++) {s = s + noises.get(i) + " / ";}s = s + SEPARATOR;}return s;}
}

ChineseAddressParser.java类如下

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/*** Created by ajtdnyy on 13-9-3.*/
public class ChineseAddressParser {static final String reg = "[\u4e00-\u9fa5]";static final Pattern ms_Pattern_guo = Pattern.compile("中国");static final Pattern ms_Pattern_jinjiao = Pattern.compile("近郊");static final Pattern ms_Pattern_sheng = Pattern.compile(reg + "+?省");static final Pattern ms_Pattern_shi = Pattern.compile(reg + "+?市(?!场)");static final Pattern ms_Pattern_qu = Pattern.compile(reg + "+?区");static final Pattern ms_Pattern_xiang = Pattern.compile(reg + "+?乡");static final Pattern ms_Pattern_xian = Pattern.compile(reg + "+?县");static final Pattern ms_Pattern_dao = Pattern.compile(reg + "+?道");static final Pattern ms_Pattern_hutong = Pattern.compile(reg + "+?胡同");static final Pattern ms_Pattern_nongtang = Pattern.compile(reg + "+?弄堂");static final Pattern ms_Pattern_jie = Pattern.compile(reg + "+?街");static final Pattern ms_Pattern_xiangg = Pattern.compile(reg + "+?巷");static final Pattern ms_Pattern_lu = Pattern.compile(reg + "+?路");static final Pattern ms_Pattern_cun = Pattern.compile(reg + "+?村");static final Pattern ms_Pattern_zhen = Pattern.compile(reg + "+?镇");static final Pattern ms_Pattern_hao = Pattern.compile("[甲_乙_丙_0-9_-]+?号");static final Pattern ms_Pattern_point = Pattern.compile(reg + "+?(?:广场|酒店|饭店|宾馆|中心|大厦|百货|大楼|商城)");static final Pattern ms_Pattern_ditie = Pattern.compile("地铁" + reg + "+?线(?:" + reg + "+?站)?");static final Pattern ms_Pattern_province = Pattern.compile(reg + "{2,10}?(?:省|特区|自治区|特别行政区)");static final Pattern ms_Pattern_city = Pattern.compile(reg + "+?(?:市|地区|自治州)");static final Pattern ms_Pattern_county = Pattern.compile(reg + "+?(?:乡|县)");static final Pattern ms_Pattern_street = Pattern.compile(reg + "+?街道");static final Pattern ms_Pattern_road = Pattern.compile(reg + "+?(?:胡同|弄堂|街|巷|路|道)");static final Pattern ms_Pattern_roadnear = Pattern.compile("(?<=近)" + reg + "+?(?:胡同|弄堂|街|巷|路|道)");static final Pattern ms_Pattern_ip = Pattern.compile(reg + "+?(?:开发区|科技区|园区)");static final Pattern ms_Pattern_zone = Pattern.compile(reg + "+?(?:小区|社区|新村)");static final Pattern ms_Pattern_village = Pattern.compile(reg + "+?村");static final Pattern ms_Pattern_town = Pattern.compile(reg + "+?镇");static final Pattern ms_Pattern_number = Pattern.compile("[甲_乙_丙_0-9_-]+号");static final Pattern ms_Pattern_plaza = Pattern.compile(reg + "+?(?:广场|酒店|饭店|宾馆|中心|大厦|百货|大楼|商城)");static final Pattern ms_Pattern_underground = Pattern.compile("地铁" + reg + "+?线(?:" + reg + "+?站)?");static final Splitter ms_splitter_guo = new Splitter(ms_Pattern_guo, new Pattern[]{ms_Pattern_guo});static final Splitter ms_splitter_sheng = new Splitter(ms_Pattern_sheng, new Pattern[]{ms_Pattern_province});static final Splitter ms_splitter_shi = new Splitter(ms_Pattern_shi, new Pattern[]{ms_Pattern_city}, false);static final Splitter ms_splitter_jinjiao = new Splitter(ms_Pattern_jinjiao, new Pattern[]{ms_Pattern_jinjiao});static final Splitter ms_splitter_qu = new Splitter(ms_Pattern_qu, new Pattern[]{ms_Pattern_province, ms_Pattern_city, ms_Pattern_zone, ms_Pattern_ip, ms_Pattern_qu}, false);static final Splitter ms_splitter_xiang = new Splitter(ms_Pattern_xiang, new Pattern[]{ms_Pattern_county});static final Splitter ms_splitter_xian = new Splitter(ms_Pattern_xian, new Pattern[]{ms_Pattern_county});static final Splitter ms_splitter_dao = new Splitter(ms_Pattern_dao, new Pattern[]{ms_Pattern_street, ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_hutong = new Splitter(ms_Pattern_hutong, new Pattern[]{ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_nongtang = new Splitter(ms_Pattern_nongtang, new Pattern[]{ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_jie = new Splitter(ms_Pattern_jie, new Pattern[]{ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_lu = new Splitter(ms_Pattern_lu, new Pattern[]{ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_xiangg = new Splitter(ms_Pattern_xiangg, new Pattern[]{ms_Pattern_roadnear, ms_Pattern_road}, false);static final Splitter ms_splitter_cun = new Splitter(ms_Pattern_cun, new Pattern[]{ms_Pattern_zone, ms_Pattern_village});static final Splitter ms_splitter_zhen = new Splitter(ms_Pattern_zhen, new Pattern[]{ms_Pattern_town});static final Splitter ms_splitter_hao = new Splitter(ms_Pattern_hao, new Pattern[]{ms_Pattern_number});static final Splitter ms_splitter_point = new Splitter(ms_Pattern_point, new Pattern[]{ms_Pattern_plaza});static final Splitter ms_splitter_ditie = new Splitter(ms_Pattern_ditie, new Pattern[]{ms_Pattern_underground});static final Splitter[] ms_defaultsplitters = new Splitter[]{ms_splitter_guo,ms_splitter_sheng,ms_splitter_shi,ms_splitter_qu,ms_splitter_xiang,ms_splitter_xian,ms_splitter_dao,ms_splitter_hutong,ms_splitter_nongtang,ms_splitter_jie,ms_splitter_xiangg,ms_splitter_lu,ms_splitter_cun,ms_splitter_zhen,ms_splitter_hao,ms_splitter_point,ms_splitter_ditie,ms_splitter_jinjiao};private static LinkedHashMap<Integer, Splitter> split(String src, Splitter[] splitters) {LinkedHashMap<Integer, Splitter> splitterdic = new LinkedHashMap<Integer, Splitter>();for (Splitter s : splitters) {Matcher m = s.pattern.matcher(src);while (m.find()) {splitterdic.put(m.start() + m.group().length(), s);if (s.flag) {break;}}}return splitterdic;}private static ArrayList<Segment> recognize(String src, LinkedHashMap<Integer, Splitter> splitterdic) {Segment s;int index = 0;ArrayList<Segment> segments = new ArrayList<Segment>();if (src.length() > 0) {for (Integer key : splitterdic.keySet()) {Splitter value = splitterdic.get(key);if (key > index && key < src.length()) {for (Pattern r : value.patterns) {s = segmentRecognize(src.substring(index, key), r);if (s != null) {segments.add(s);break;}}index = key;}}}return segments;}private static Segment segmentRecognize(String src, Pattern r) {Matcher m = r.matcher(src);if (m.matches()) {return new Segment(m.group(), r);} else {return null;}}private static ArrayList<String> segmentsGetStringListForPattern(ArrayList<Segment> segments, Pattern r) {ArrayList<String> ss = new ArrayList<String>();for (Iterator<Segment> it = segments.iterator(); it.hasNext();) {Segment s = it.next();if (s.pattern == r) {ss.add(s.value);}}return ss;}private static String segmentsGetStringForPattern(ArrayList<Segment> segments, Pattern r) {for (Iterator<Segment> it = segments.iterator(); it.hasNext();) {Segment s = it.next();if (s.pattern == r) {return s.value;}}return null;}public static void main(String[] args) {System.out.println(ChineseAddressParser.parse("北京市海淀区中关村北大街37号天龙大厦3层"));System.out.println(ChineseAddressParser.parse("福州市台江区群众路278号源利明珠大厦6楼"));System.out.println(ChineseAddressParser.parse("北京西城区百万庄大街68号6楼"));}public static ChineseAddress parse(String source) {source = source.replace(".", "").replace(",", "").replace(",", "");ArrayList<Segment> segments = recognize(source, split(source, ms_defaultsplitters));ChineseAddress ca = new ChineseAddress();ca.source = source;ca.nation = segmentsGetStringForPattern(segments, ms_Pattern_guo);ca.province = segmentsGetStringForPattern(segments, ms_Pattern_province);ca.city = segmentsGetStringForPattern(segments, ms_Pattern_city);ca.district = segmentsGetStringForPattern(segments, ms_Pattern_qu);ca.county = segmentsGetStringForPattern(segments, ms_Pattern_county);ca.street = segmentsGetStringForPattern(segments, ms_Pattern_street);ArrayList<String> roads = segmentsGetStringListForPattern(segments, ms_Pattern_road);ArrayList<String> near = segmentsGetStringListForPattern(segments, ms_Pattern_roadnear);for (Iterator<String> it = near.iterator(); it.hasNext();) {roads.add(it.next());}ca.roads = roads;ca.underground = segmentsGetStringForPattern(segments, ms_Pattern_underground);ca.number = segmentsGetStringForPattern(segments, ms_Pattern_number);ca.plaza = segmentsGetStringForPattern(segments, ms_Pattern_plaza);ca.ip = segmentsGetStringForPattern(segments, ms_Pattern_ip);ca.town = segmentsGetStringForPattern(segments, ms_Pattern_town);ca.village = segmentsGetStringForPattern(segments, ms_Pattern_village);return ca;}
}

原文地址: http://www.vbox.top/38.html
原作者C#博客地址: http://blog.csdn.net/helanmouse/article/details/4096933?reload

JAVA版 中文地址 识别 切分相关推荐

  1. 中文地址 识别 切分

    c#版的 由于中文地址比较有规律 所以主要用到正则,先分割 再识别,识别率要高不少 中文地址容器: public struct ChineseAddress { public string sourc ...

  2. Java版中文分词 IKAnalyzer

    效果:中文分词统计出现次数并排序 直接看代码: import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.L ...

  3. java 人脸识别jar包_java版天网人脸识别系统,获取视频流人脸识识别推送服务器展示...

    java版天网人脸识别系统,获取视频流 进行人脸识别后推送到流媒体服务器实时展示 获取视频流 进行人脸识别后推送到red5服务器(人脸识别技术由虹软®提供) 整个系统共有两个项目组成 red5_hls ...

  4. Java 启动和停止界面_我的世界Java版1.16.4-pre2游戏下载-我的世界Java版1.16.4-pre2中文版下载...

    我的世界Java版是一款可以让玩家自由发挥自己创造力的像素沙盒类游戏,在这款游戏中玩家就是一片天地的造物主,玩家可以把现实中一切的建筑都完美的复刻在这款游戏里,如此好玩的游戏,玩家还在等什么,快来下载 ...

  5. java和基岩版区别_我的世界基岩版与Java版有什么区别?

    我的世界是一款受到非常多玩家喜爱的沙盒建造游戏,玩家可以在三维世界里做任何自己想做的事情.很多小白玩家分不清基岩版和Java版的区别.为此,小编特意收集了资料给大家分享一下本篇教程,希望能够帮助到大家 ...

  6. 我的世界java版和windows版_我的世界基岩版与Java版有什么区别?

    我的世界是一款受到非常多玩家喜爱的沙盒建造游戏,玩家可以在三维世界里做任何自己想做的事情.很多小白玩家分不清基岩版和Java版的区别.为此,小编特意收集了资料给大家分享一下本篇教程,希望能够帮助到大家 ...

  7. Java使用Tesseract-OCR文字识别(Java调用tess4j提取图片中文、英文、数字信息)

    由于需要在应用中将原本的身份认证手动提交身份信息改为用户上传身份证照自动提取信息,提升用户体验,第一时间想到阿里云等平台的收费服务及开源技术Tesseract-OCR(Tesseract-OCR提供了 ...

  8. 京东商城(360Buy)价格识别 java版

    上一篇介绍到 利用Jsoup抓取各个电商网站的信息 不过有时候会遇到价格是图片的问题 这时候你只能得到一张图片了 如果有个能把图片解析出来那该多爽啊 去百度一搜"京东(360Buy)价格识别 ...

  9. java 网络流量统计_【Java】人流量统计-动态版之摄像头识别显示

    [Java]人流量统计-动态版之视频转图识别请访问 http://ai.baidu.com/forum/topic/show/940413 本文是基于上一篇进行迭代的.本文主要是以摄像头画面进行人流量 ...

最新文章

  1. Win2008R2配置WebDeploy发布网站
  2. 5新建没有头文件_IAR新建工程
  3. java接口调试思想
  4. 决策树——CART和模型树
  5. int(a) 和 (int ) a 及 数据存储地址的探究
  6. 自动化创建tornado项目
  7. Codevs 1506 传话
  8. 如何将特定提交推送到远程,而不是之前的提交?
  9. 《JavaScript高级程序设计》笔记之'ECMAScript基础'
  10. 夏令营课程产品介绍PPT模板
  11. perl练习——FASTA格式文件中序列GC含量计算perl数组排序如何获得下标或者键
  12. ps怎么更改背景图层大小_如何利用Photoshop软件修改图片尺寸的大小
  13. 计算机网络管理员路由与交换深圳积多少分,2020年深圳积分入户,哪些加分的证书总结?...
  14. http状态码全解读
  15. C盘容量不足,磁盘满了怎么办
  16. 醉后不知天在水 满船清梦压星河。—第二十一天
  17. Desktop Computer操作系统之GUI发展
  18. Linux监控软件之 Nagios
  19. 【持续更新中】C#常见问题及其解决(VS2019)
  20. 更改西门子PLC的IP地址

热门文章

  1. 如何从零开始设计一款小程序原型?
  2. Scons安装和使用
  3. 系统性简述蓝牙以及ESP32对BLE蓝牙的使用(一)
  4. 指针数组下标JAVA_Java语言中可用下标和指针两种方式表示数组元素。
  5. 阿里云视频点播(VOD)控制台上传慢解决方案
  6. 案例拆解:元气森林,这些企业微信裂变经验值得你学习
  7. 北航软件测评中心 招聘FPGA测试工程师
  8. 如何对比手机、买手机??
  9. 计算机原理实验红绿灯转换,微机原理实验交通灯控制实验.doc
  10. PDPS软件虚拟仿真:Lock TCPF功能介绍与使用方法