lucene创建索引

1.导入jar包

2.创建实体Bean

package com.zhishang.lucene;/*** Created by Administrator on 2017/7/8.*/
public class HtmlBean {private String title;private String content;private String url;public void setTitle(String title) {this.title = title;}public void setContent(String content) {this.content = content;}public void setUrl(String url) {this.url = url;}public String getTitle() {return title;}public String getContent() {return content;}public String getUrl() {return url;}
}

3.创建工具Bean

package com.zhishang.lucene;import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.junit.Test;import java.io.File;
import java.io.IOException;/*** Created by Administrator on 2017/7/8.*/
public class HtmlBeanUtil {public static HtmlBean parseHtml(File file){try {Source sc = new Source(file);Element element = sc.getFirstElement(HTMLElementName.TITLE);if (element == null || element.getTextExtractor() == null){return null;}HtmlBean htmlBean = new HtmlBean();htmlBean.setTitle(element.getTextExtractor().toString());htmlBean.setContent(sc.getTextExtractor().toString());htmlBean.setUrl(file.getAbsolutePath());return htmlBean;} catch (IOException e) {e.printStackTrace();}return null;}
}

4.创建操作Bean

package com.zhishang.lucene;import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;import java.io.File;
import java.io.IOException;
import java.util.Collection;/*** Created by Administrator on 2017/7/7.*/
public class CreateIndex {public static final String indexDir = "G:/index";public static final String dataDir = "G:/data";public void createIndex(){try {Directory dir = FSDirectory.open(new File(indexDir));//分词器Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9,analyzer);config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);IndexWriter writer = new IndexWriter(dir,config);File file = new File(dataDir);RAMDirectory ramdir = new RAMDirectory();Analyzer analyzer1 = new IKAnalyzer();IndexWriterConfig config1 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer1);IndexWriter ramWriter = new IndexWriter(ramdir,config1);Collection<File> files = FileUtils.listFiles(file, TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);int count = 0;for(File f:files){HtmlBean bean =  HtmlBeanUtil.parseHtml(f);if(bean != null){Document document = new Document();document.add(new StringField("title",bean.getTitle(), Field.Store.YES));document.add(new TextField("content",bean.getContent(), Field.Store.YES));document.add(new StringField("url",bean.getUrl(), Field.Store.YES));ramWriter.addDocument(document);count++;if (count == 50){ramWriter.close();writer.addIndexes(ramdir);ramdir = new RAMDirectory();Analyzer analyzer2 = new IKAnalyzer();IndexWriterConfig config2 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer2);ramWriter = new IndexWriter(ramdir,config2);count = 0;}}}writer.close();} catch (IOException e) {e.printStackTrace();}}
}

5.创建测试Bean

package com.zhishang.lucene;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;import java.io.File;/*** Created by Administrator on 2017/7/8.*/
public class LuceneBean {/*创建索引*/@Testpublic void createIndex(){File file = new File(CreateIndex.indexDir);if (file.exists()){file.delete();file.mkdirs();}CreateIndex createIndex = new CreateIndex();createIndex.createIndex();}
}

6.查看生成的索引文件

转载于:https://blog.51cto.com/suyanzhu/1945466

lucene创建索引相关推荐

【示例】Lucene创建索引库编程步骤
[示例]Lucene创建索引库编程步骤
lucene创建索引_Lucene概述第一部分：创建索引
lucene创建索引介绍我最近一直在与开源搜索引擎Lucene合作 . 我不是专家,但是由于我只是浏览了一些相当稀疏的文档并将应用程序从Lucene的很旧的版本迁移到了最新版本的2.4,所以我在总 ...
lucene创建索引时出错：扑捉到FileNotFoundException
lucene创建索引文件出错,如下图: 解决方案: 解锁即可代码如下: bool isUpdate = IndexReader.IndexExists(directory);if (isUpdate ...
搜索引擎学习（二）Lucene创建索引
PS:需要用到的jar包: 代码实现 1.工程结构 2.设置工程依赖的jar包 3.代码实现 /*** Lucene入门* 创建索引*/ public class CreateIndex {/*** ...
Lucene创建索引入门案例
最近在学习lucene,参考网上的资料写了一个简单搜索demo: 项目jar包: //索引关键类 <pre name="code" class="java" ...
lucene 创建索引慢的问题
网上随便一搜都能搜到很多关于lucene的教程,这里就不细展开了.简单说下过程: IndexWriterConfig indexWriterConfig = new IndexWriterConfig ...
Lucene概述第一部分：创建索引
介绍我最近一直在与开源搜索引擎Lucene合作 . 我不是专家,但是由于我只是浏览了一些相当稀疏的文档并将应用程序从Lucene的很旧的版本迁移到了最新版本的2.4,所以我在总体上很清楚. Luce ...
Lucene构建索引的原理及源代码分析
文章目录 1. Lucene是什么 2. 全文检索是什么 3. 术语 4. 创建索引过程 4.1 Lucene创建索引示例代码 4.2 分词的过程 4.2.1 原理 4.2.2 源代码 4.3 建索引 ...
lucene全文搜索之三：生成索引字段，创建索引文档（给索引字段加权）基于lucene5.5.3...
前言:上一章中我们已经实现了索引器的创建,但是我们没有索引文档,本章将会讲解如何生成字段.创建索引文档,给字段加权以及保存文档到索引器目录 luncene5.5.3集合jar包下载地址:http:// ...

lucene创建索引

1.导入jar包

2.创建实体Bean

3.创建工具Bean

4.创建操作Bean

5.创建测试Bean

6.查看生成的索引文件

lucene创建索引相关推荐

最新文章

热门文章