pom.xml设置

    <dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version><scope>test</scope></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-queryparser</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-common</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-smartcn</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-highlighter</artifactId><version>5.3.1</version></dependency>

生成索引IndexingTest.java

package com.chabansheng.lucene;import java.nio.file.Paths;import ...;public class IndexingTest {private String ids[]={"1","2","3","4"};private String authors[]={"Jack","Marry","John","Json"};private String positions[]={"accounting","technician","salesperson","boss"};private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};private String contents[]={"If possible, use the same JRE major version at both index and search time.","When upgrading to a different JRE major version, consider re-indexing. ","Different JRE major versions may implement different versions of Unicode,","For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"};private Directory dir;/*** 生成索引* @throws Exception*/@Testpublic void index()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));//获取IndexWriter实例Analyzer analyzer=new StandardAnalyzer(); // 标准分词器IndexWriterConfig iwc=new IndexWriterConfig(analyzer);IndexWriter writer=new IndexWriter(dir, iwc);for(int i=0;i<ids.length;i++){Document doc=new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("author",authors[i],Field.Store.YES));doc.add(new StringField("position",positions[i],Field.Store.YES));// 加权操作TextField field=new TextField("title", titles[i], Field.Store.YES);if("boss".equals(positions[i])){field.setBoost(1.5f);}doc.add(field);doc.add(new TextField("content", contents[i], Field.Store.NO));writer.addDocument(doc); // 添加文档
        }writer.close();}/*** 查询索引方式一* @throws Exception*/@Testpublic void search()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene"));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);String searchField="title";String q="java";//Term方式查询Term t=new Term(searchField,q);Query query=new TermQuery(t);TopDocs hits=is.search(query, 10);System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("author"));}reader.close();}}

查询索引方式二Searcher.java

package com.chabansheng.lucene;import java.io.StringReader;
import java.nio.file.Paths;import ...;public class Searcher {public static void search(String indexDir,String q)throws Exception{Directory dir=FSDirectory.open(Paths.get(indexDir));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);//QueryParser查询方式// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();QueryParser parser=new QueryParser("desc", analyzer);Query query=parser.parse(q);TopDocs hits=is.search(query, 10); //高亮行号QueryScorer scorer=new QueryScorer(query);Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);highlighter.setTextFragmenter(fragmenter);for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("city"));System.out.println(doc.get("desc"));String desc=doc.get("desc");if(desc!=null){TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));System.out.println(highlighter.getBestFragment(tokenStream, desc));}}reader.close();}public static void main(String[] args) {String indexDir="D:\\lucene2";String q="南京文明";try {search(indexDir,q);} catch (Exception e) {// TODO Auto-generated catch block
            e.printStackTrace();}}
}

package com.chabansheng.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.junit.Test;
public class IndexingTest {
private String ids[]={"1","2","3","4"};private String authors[]={"Jack","Marry","John","Json"};private String positions[]={"accounting","technician","salesperson","boss"};private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};private String contents[]={"If possible, use the same JRE major version at both index and search time.","When upgrading to a different JRE major version, consider re-indexing. ","Different JRE major versions may implement different versions of Unicode,","For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"};private Directory dir;/** * 生成索引 * @throws Exception */@Testpublic void index()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));//获取IndexWriter实例Analyzer analyzer=new StandardAnalyzer(); // 标准分词器IndexWriterConfig iwc=new IndexWriterConfig(analyzer);IndexWriter writer=new IndexWriter(dir, iwc);for(int i=0;i<ids.length;i++){Document doc=new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("author",authors[i],Field.Store.YES));doc.add(new StringField("position",positions[i],Field.Store.YES));// 加权操作TextField field=new TextField("title", titles[i], Field.Store.YES);if("boss".equals(positions[i])){field.setBoost(1.5f);}doc.add(field);doc.add(new TextField("content", contents[i], Field.Store.NO));writer.addDocument(doc); // 添加文档}writer.close();}
/** * 查询 * @throws Exception */@Testpublic void search()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);String searchField="title";String q="java";//Term方式查询Term t=new Term(searchField,q);Query query=new TermQuery(t);TopDocs hits=is.search(query, 10);System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("author"));}reader.close();}}

转载于:https://www.cnblogs.com/375163374lsb/p/10542985.html

lucene学习的小结相关推荐

  1. Lucene学习总结之六:Lucene打分公式的数学推导

     Lucene学习总结之六:Lucene打分公式的数学推导 在进行Lucene的搜索过程解析之前,有必要单独的一张把Lucene score公式的推导,各部分的意义阐述一下.因为Lucene的搜索 ...

  2. 集成学习原理小结(转载)

    集成学习(ensemble learning)可以说是现在非常火爆的机器学习方法了.它本身不是一个单独的机器学习算法,而是通过构建并结合多个机器学习器来完成学习任务.也就是我们常说的"博采众 ...

  3. Lucene学习总结之七:Lucene搜索过程解析

    一.Lucene搜索过程总论 搜索的过程总的来说就是将词典及倒排表信息从索引中读出来,根据用户输入的查询语句合并倒排表,得到结果文档集并对文档进行打分的过程. 其可用如下图示: 总共包括以下几个过程: ...

  4. Lucene学习笔记(1)

    Lucene学习笔记 可以搜索文本文件,理论上可以搜索任何类型的数据.只要先把数据转化为文本,就可以对数据进行索引和搜索. 使用了反向索引的机制,维护一个词/短语的表,对于每个词和短语都有一个链表描述 ...

  5. lucene学习笔记_学习Lucene

    lucene学习笔记 我目前正在与一个团队合作,开始一个基于Lucene的新项目. 虽然大多数时候我会争论使用Solr还是Elasticsearch而不是简单的Lucene,但这是一个有意识的决定. ...

  6. Lucene学习——IKAnalyzer中文分词(二)

    一.环境 1.平台:MyEclipse8.5/JDK1.5 2.框架:Lucene3.6.1/IKAnalyzer2012/htmlparser 二.目标 1.整合前面连篇文章(Lucene学习--I ...

  7. Lucene学习-深入Lucene分词器,TokenStream获取分词详细信息

    Lucene学习-深入Lucene分词器,TokenStream获取分词详细信息 在此回复牛妞的关于程序中分词器的问题,其实可以直接很简单的在词库中配置就好了,Lucene中分词的所有信息我们都可以从 ...

  8. Lucene 5.2.1 + jcseg 1.9.6中文分词索引(Lucene 学习序列2)

    Lucene 5.2.1 + jcseg 1.9.6中文分词索引(Lucene 学习序列2) jcseg是使用Java开发的一个开源的中文分词器,使用流行的mmseg算法实现.是一款独立的分词组件,不 ...

  9. Lucene 学习资料

    2019独角兽企业重金招聘Python工程师标准>>> Lucene是一个基于Java的全文索引工具包. 另外,如果是在选择全文引擎,现在也许是试试Sphinx的时候了:相比Luce ...

最新文章

  1. python3使用requests模块完成get/post/代理/自定义header/自定义Cookie
  2. EXPLAIN 命令详解
  3. 在Windows 2008 R2下部署SQL Server 2008 R2 群集(一)
  4. poj-1659-Frogs Neighborhood-(图论-是否可图)
  5. 汇编语言---键盘KeyCode值列表
  6. lambda表达式对list排序
  7. java visualvm远程监控_如何监控和诊断堆外内存使用
  8. 图书配套光盘、部分软件下载
  9. Atitit 学校工作手册attilax艾提拉总结 目录 1. 团队文化 宗旨 与使命 2 1.1. 学术教育vs 技术教育vs 技能职业教育 2 1.2. 天堂模式vs地狱模式 2 2. 组织结构
  10. 超像素、语义分割、实例分割、全景分割
  11. 拉钩教育大前端课程学习-半年总结
  12. 关于全国信息安全作品赛的了解
  13. 说明:最新谷歌AI智商不超过6岁研究来自科学院而非康内尔大学
  14. SAP 云平台 (Cloud Platform) 架构概述
  15. Vue中watch监听路由的使用场景
  16. 利用物联网更好地管理供应链
  17. Dev C++ 运行出现:源文件未编译
  18. qmail邮件系统(五)vpopmail和qmailadmin对用户的管理
  19. 强化区域产业链,优化区域产业布局,促区域经济高速发展
  20. 百度网盘满速下载文件

热门文章

  1. 【Python CheckiO 题解】Speech Module
  2. 【POJ - 2255】Tree Recovery (给定树的先序中序,输出后序)
  3. 【ZOJ - 3870】Team Formation(异或,思维)
  4. 【牛客 - 272B】Xor Path(树上操作,路径异或值)
  5. 【CodeForces - 278C 】Learning Languages(并查集,思维)
  6. python控制浏览器导出数据_Django导出数据为Excel,调用浏览器下载
  7. html 弹出加载页面,magnific popup:将整个html页面加载到弹出窗口中
  8. 最简单的控制台登录小案例,适合初学者
  9. phoenix hbase java_java jdbc访问hbase phoenix
  10. redis——HyperLogLog