#删除索引

DELETE tt

#设置全局属性

PUT tt
{"settings": {"analysis": {"analyzer": {"ik_smart_pinyin": {"type": "custom","tokenizer": "ik_smart","filter": ["my_pinyin"]},"ik_max_word_pinyin": {"type": "custom","tokenizer": "ik_max_word","filter": ["my_pinyin"]}},"filter": {"my_pinyin": {"keep_joined_full_pinyin": "true","lowercase": "true","none_chinese_pinyin_tokenize": "false","keep_none_chinese_in_joined_full_pinyin": "true","keep_original": "false","remove_duplicated_term": "true","keep_first_letter": "false","keep_separate_first_letter": "false","type": "pinyin","limit_first_letter_length": "16","keep_full_pinyin": "false"}}}}}

#测试全局属性

 GET tt/_analyze{"text":"翻哩","analyzer":"ik_max_word_pinyin"}GET tt/_analyze
{"text":": 2g","analyzer":"ik_max_word_pinyin"
}

#新建映射

POST tt/entry/_mapping
{"entry": {"properties": {"author": {"type": "keyword","store": true},"content": {"type": "text","store": true,"analyzer": "ik_max_word_pinyin"},"createTime": {"type": "date","store": true,"format": "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd"},"fondsId": {"type": "keyword","store": true},"mgtLevel": {"type": "keyword","store": true},"nodeId": {"type": "keyword","store": true},"other": {"type": "text","store": true},"tag": {"type": "text","store": true},"title": {"type": "text","store": true,"analyzer": "ik_max_word_pinyin"},"updateTime": {"type": "date","store": true,"format": "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd"},"view": {"type": "text","store": true}}}}

#查看全局设置

GET tt/_settings

#写值

PUT tt/entry/1
{"id":106996,"title":"431101199607116477(431101199607116477)","content":"43110s","fondsId":299
}

#测试另一种映射

POST tt/entry/_mapping
{"entry":{"properties":{"id":{"type":"keyword","store":true},"name":{"type":"text","store":true,"analyzer":"ik_smart_pinyin"},"content":{"type":"text","store":true,"analyzer":"ik_smart"}}}
}

#查看映射

GET tt/_mapping

#删除索引库

DELETE test

#创建索引库

PUT test

#配置全局映射:默认+动态模板

PUT _template/global_template
{"template": "*","settings": {"number_of_shards": 1},"mappings": {"_default_": {"_all":{"enabled":false},"dynamic_templates":[{"string_as_text":{"match_mapping_type":"string","match":"*_text","mapping":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word","fields":{"row":{"type":"keyword","ignore_above":256}}}}},{"string_as_keyword":{"match_mapping_type":"string","mapping":{"type":"keyword"}}}]}}
}

#删除类型映射

DELETE test/_mapping/user

#做类型映射

POST test/_mapping/user
{"user":{"properties":{"id":{"type":"long"},"userAll":{"type":"text","analyzer":"ik_smart"},"name":{"type":"keyword"},"info":{"type":"text","analyzer":"ik_smart","search_analyzer":"ik_smart","copy_to":"userAll"}}}
}

#查看类型映射

GET test/_mapping/user

#添加数据

PUT test/user/1
{"id":"1","name":"张三丰","info":"张三丰,名君宝(又名全一),字符元,道号三丰。"
}

#根据索引查看

GET test/user/1

#查询

GET test/user/_search
{"query": {"bool": {"must": [{"match": {"info": "君宝"}}]}}
}

#细粒度分词

POST _analyze
{"analyzer": "ik_smart","text": "ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java开发的,并作为Apache许可条款下的开..."
}

#最大拆分分词

POST _analyze
{"analyzer": "ik_max_word","text": "ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java开发的,并作为Apache许可条款下的开..."
}

#删除索引库

DELETE test

#创建索引库

PUT test

#配置全局映射:默认+动态模板

PUT _template/global_template
{"template": "*","settings": {"number_of_shards": 1},"mappings": {"_default_": {"_all":{"enabled":false},"dynamic_templates":[{"string_as_text":{"match_mapping_type":"string","match":"*_text","mapping":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word","fields":{"row":{"type":"keyword","ignore_above":256}}}}},{"string_as_keyword":{"match_mapping_type":"string","mapping":{"type":"keyword"}}}]}}
}

#拼音测试

DELETE twitterPUT twitter
{"index": {"analysis": {"analyzer": {"ik_pinyin_analyzer": {"type": "custom","tokenizer": "ik_smart","filter": ["my_pinyin","word_delimiter"]}},"filter": {"my_pinyin": {"type": "pinyin","first_letter": "prefix","padding_char": " "}}}}
}PUT twitter/doc/_mapping
{"doc": {"properties": {"type": { "type": "keyword" }, "name": { "type": "text" },"user_name": { "type": "keyword" },"email": { "type": "keyword" },"content": { "type": "keyword","analyzer":"ik_smart","fields": {"pinyin": {"type": "text","store": false,"term_vector": "with_positions_offsets","analyzer": "ik_pinyin_analyzer","boost": 10}}},"tweeted_at": { "type": "date" }}}
}PUT twitter/doc/user-kimchy
{"type": "user", "name": "Shay Banon","user_name": "kimchy","email": "shay@kimchy.com"
}PUT twitter/doc/tweet-4
{"type": "tweet", "user_name": "kimchy","tweeted_at": "2017-10-24T09:00:00Z","content": "明星生孩子压力大?高圆圆钟丽缇心态不一,婚后生活大不同 "
}GET twitter/_search
{"query": {"bool": {"must": {"match": {"content": "gaoyuanyuan"}},"filter": {"match": {"type": "tweet" }}}}
}

#做类型映射

POST test/_mapping/user
{"user":{"properties":{"id":{"type":"long"},"userAll":{"type":"text","analyzer":"ik_smart","fields": {"pinyin": {"type": "text","store": false,"term_vector": "with_positions_offsets","analyzer": "ik_pinyin_analyzer","boost": 10}}},"name":{"type":"keyword"},"info":{"type":"text","analyzer":"ik_smart","search_analyzer":"ik_smart","copy_to":"userAll"}}}
}

#查看类型映射

GET test/_mapping/user

#添加数据

PUT test/user/5
{"id":"5","name":"张三丰","info":"名君宝,字符元。三"
}

#根据索引查看

GET test/user/1

#查询

GET test/user/_search
{"query": {"bool": {"must": [{"match": {"info": "三丰"}}]}}
}

#细粒度分词

POST _analyze
{"analyzer": "pinyin","text": "张三丰,名君宝(又名全一),字符元,道号三丰。"
}

#最大拆分分词

POST _analyze
{"analyzer": "ik_max_word","text": "ElasticSearch是一个基于Lucene的搜索服务器。它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java开发的,并作为Apache许可条款下的开..."
}GET _search

#拼音分词

POST /_analyze
{"analyzer":"pinyin","text":"中华人民共和国国歌"
}

#配置全局映射:默认+动态模板

PUT _template/global_pinyin_template
{"index": {"analysis": {"analyzer": {"pinyin_analyzer": {"tokenizer": "my_pinyin"}},"tokenizer": {"my_pinyin": {"type": "pinyin","keep_separate_first_letter": false,"keep_full_pinyin": true,"keep_original": true,"limit_first_letter_length": 10,"lowercase": true,"remove_duplicated_term": true}}}}
}

#删除 index

DELETE index_name

#创建一个 index_name 的 index

PUT index_name
{"index": {"analysis": {"analyzer": {"ik_pinyin_analyzer": {"type": "custom","tokenizer": "ik_smart","filter": ["my_pinyin","word_delimiter"]}},"filter": {"my_pinyin": {"type": "pinyin","first_letter": "prefix","padding_char": " "}}}}
}

#修改 type 的 mapping

PUT index_name/app/_mapping
{"app": {"properties": {"ProductCName": {"type": "keyword","fields": {"pinyin": {"type": "text","store": false,"term_vector": "with_positions_offsets","analyzer": "ik_pinyin_analyzer","boost": 10}}},"ProductEName": {"type": "text","analyzer": "ik_smart"},"Description": {"type": "text","analyzer": "ik_smart"}}}
}

#创建测试数据

PUT index_name/app/1
{"ProductCName":"口红世家","ProductEName":"Red History","Description":"口红真是很棒的东西呢"
}

#查询

GET index_name/_search
{"query": {"bool": {"must": [{"match": {"ProductCName.pinyin": "kou"}}]}}
}

中文,拼音分词使用练习记录相关推荐

  1. debian 10 buster 安装配置 elastic search 和 中文, 拼音分词

    debian 10 buster 安装配置 es 和 中文, 拼音分词 安装 测试 配置 分词 IK 分词器 拼音分词 一个完整的动态映射模板(包含geo, pinyin, IK) 安装 1, 安装j ...

  2. 记一次springboot2.3.*项目整合elasticsearch7.6.2实现中文拼音分词搜索

    一.elasticsearch官网下载:Elasticsearch 7.6.2 | Elastic 二.拼音.ik.繁简体转换插件安装 ik分词:GitHub - medcl/elasticsearc ...

  3. Elasticsearch 5.5.1 中文/拼音分词 亲测有效

    所有不说明elastic 版本的博客都是耍流氓 . --某码农 原文链接 版本如题.拼音和中文分词一起的整个测试流程如下: 预备 删除 index DELETE /index_name/ { } 创建 ...

  4. es拼音分词 大帅哥_SpringBoot集成Elasticsearch 进阶,实现中文、拼音分词,繁简体转换...

    Elasticsearch 分词 分词分为读时分词和写时分词. 读时分词发生在用户查询时,ES 会即时地对用户输入的关键词进行分词,分词结果只存在内存中,当查询结束时,分词结果也会随即消失.而写时分词 ...

  5. solr 中文分词器IKAnalyzer和拼音分词器pinyin

    solr分词过程: Solr Admin中,选择Analysis,在FieldType中,选择text_en 左边框输入 "冬天到了天气冷了小明不想上学去了",点击右边的按钮,发现 ...

  6. elasticsearch-analysis-ik中文分词插件安装及配置Ik自定义词典+拼音分词

    在Elasticsearch中默认的分词器对中文的支持不好,会分隔成一个一个的汉字.而IK分词器对中文的支持比较好,主要有两种模式"ik_smart"和"ik_max_w ...

  7. Solr之——配置中文分词IKAnalyzer和拼音分词pinyinAnalyzer

    转载请注明出处:http://blog.csdn.net/l1028386804/article/details/70200378 在上一篇博文<CentOS安装与配置Solr6.5>中, ...

  8. 开源项目在线化 中文繁简体转换/敏感词/拼音/分词/汉字相似度/markdown 目录

    前言 以前在 github 上自己开源了一些项目.碍于技术与精力,大部分项目都是 java 实现的. 这对于非 java 开发者而言很不友好,对于不会编程的用户更加不友好. 为了让更多的人可以使用到这 ...

  9. es自定义拼音分词器处理中文拼音排序问题

    1.先上结论,如下mapping可以解决es拼音排序问题 {"settings": {"number_of_shards": "3",&qu ...

最新文章

  1. 第六课.模型评估与模型选择
  2. nginx 没有sbin目录_CentOS7下Nginx+ModSecurity配置、安装、测试教程
  3. linux 压缩和解压缩(zip/unzip)
  4. 3、MySQL 8.0.20在Linux(centos 8)上搭建主从复制
  5. Scala集合:ListBuffer可变集合的head/tail/last/init方法
  6. 【牛客刷题记录】2021-03-10
  7. 如何理解HTML结构的语义化?
  8. 请问:如何在C#简单分布式程序的数据层中为其它层留出很好的接口?????...
  9. 庆祝.Net BI团队成立!
  10. python基本定制之__new__, __init__
  11. 深入解读Linux进程调度系列(4)——调度与时钟中断
  12. 如何获得Android手机的软件安装列表
  13. resetuser.php,Php开发过程中不常碰到的error (2.25更新)
  14. 【微信小程序】flex布局
  15. MSDN visual studio 2010简体中文旗舰版,专业版下载(内置正版密钥)
  16. 查看本机mysql安装信息
  17. cursor: not-allowed; readonly的升级版,鼠标经过时,显示为禁止图标
  18. TearDrop代码编程与SOCKET应用实例
  19. 零基础教程-如何快速将EXCEL表格数据拆分成多个文件
  20. 树莓派存储方案_还在用笨重的NAS存储服务器?你可以自己动手用树莓派DIY一个...

热门文章

  1. Can you stand on the shoulders of giants?
  2. 什么是聚簇索引与非聚簇索引
  3. 【新手入门Python必看】1000+常用Python库一览
  4. 微信小程序canvas商品分享海报
  5. 奇葩公司:520不发朋友圈秀恩爱就要劝退!
  6. 去金坛长荡湖吃船菜,原来1947年就有前辈在努力发展中国的计算机产业
  7. 极光推送集成APP和web服务
  8. MATLAB如何解决failed to start a parallel pool问题
  9. Windows Update Downloader:系统补丁离线打
  10. 核磁共振、顺磁共振、磁共振成像这些原理你都了解吗