lucene的基础入门

一创建maven项目 lucene_1

引入依赖:

<dependencies><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.6</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version></dependency><!-- ik中文分词器 --><dependency><groupId>com.janeluo</groupId><artifactId>ikanalyzer</artifactId><version>2012_u6</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-common</artifactId><version>4.10.3</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>4.10.3</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser --><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-queryparser</artifactId><version>4.10.3</version></dependency></dependencies>

二准备数据

1.创建表book

CREATE TABLE `book` (`id` int(11) DEFAULT NULL,`name` varchar(192) DEFAULT NULL,`price` float DEFAULT NULL,`pic` varchar(96) DEFAULT NULL,`description` text
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

2.编写Book实体类

package domain;public class Book {private Integer id;private String name;private Float price;private String pic;private String description;@Overridepublic String toString() {return "Book{" +"id=" + id +", name='" + name + '\'' +", price=" + price +", pic='" + pic + '\'' +", description='" + description + '\'' +'}';}public Integer getId() {return id;}public void setId(Integer id) {this.id = id;}public String getName() {return name;}public void setName(String name) {this.name = name;}public Float getPrice() {return price;}public void setPrice(Float price) {this.price = price;}public String getPic() {return pic;}public void setPic(String pic) {this.pic = pic;}public String getDescription() {return description;}public void setDescription(String description) {this.description = description;}
}

3 编写Dao,查询所有图书信息

public class BookDao {public List<Book> findAll(){List<Book> bookList = new ArrayList<>();Connection conn = null;PreparedStatement pst = null;ResultSet rs = null;String url = "jdbc:mysql:///mybatis1";String user = "root";String root = "root";//加载驱动try {Class.forName("com.mysql.jdbc.Driver");} catch (ClassNotFoundException e) {e.printStackTrace();}try {//获得链接conn = DriverManager.getConnection(url, user, root);//获得执行者对象pst = conn.prepareStatement("SELECT  * FROM  Book");//获得结果集rs = pst.executeQuery();while (rs.next()) {//封装结果集Book book = new Book();book.setId(rs.getInt("id"));book.setName(rs.getString("name"));book.setPic(rs.getString("pic"));book.setPrice(rs.getFloat("price"));book.setDescription(rs.getString("description"));bookList.add(book);}} catch (SQLException e) {e.printStackTrace();}finally {if (rs != null) {try {rs.close();} catch (SQLException e) {e.printStackTrace();}}if (pst != null) {try {pst.close();} catch (SQLException e) {e.printStackTrace();}}if (conn != null) {try {conn.close();} catch (SQLException e) {e.printStackTrace();}}}return bookList;}
}

三创建索引

public class TestCreateIndex {@Testpublic void test() throws IOException {// 创建分词器Analyzer analyzer = new StandardAnalyzer();//指定索引的位置：创建索引库的位置对象FSDirectory fsDirectory = FSDirectory.open(new File("f:/dic"));//创建索引输出流配置对象: 版本号,分词器对象IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);//索引输出流: 索引库位置, 索引输出流配置对象IndexWriter indexWriter = new IndexWriter(fsDirectory,writerConfig);//获取所有数据BookDao bookDao = new BookDao();List<Book> bookList = bookDao.findAll();//创建doc 文档对象列表List<Document> docList = new ArrayList<>();//一条数据对应一个文档for (Book book : bookList) {//一个列对应一个域对象Document doc = new Document();TextField idField = new TextField("id",String.valueOf(book.getId()), Field.Store.YES);TextField nameField = new TextField("name",book.getName(), Field.Store.YES);TextField picField = new TextField("pic",book.getPic(), Field.Store.YES);TextField priceField = new TextField("price",String.valueOf(book.getPrice()), Field.Store.YES);TextField descriptionField = new TextField("description",book.getDescription(), Field.Store.YES);//将域对象添加到文档中
            doc.add(idField);doc.add(nameField);doc.add(picField);doc.add(priceField);doc.add(descriptionField);docList.add(doc);}//通过索引写到索引库for (Document document : docList) {indexWriter.addDocument(document);}//提交
        indexWriter.commit();//关闭liu
        indexWriter.close();}
}

四查看测试索引

准备:lukeall-4.10.3.jar(本人直接放在f盘根目录)

在创建索引时的位置为f:dic

运行jar

填写索引库的位置:查看索引库

五使用索引查询

public class TestSearchIndex {@Testpublic void test01() throws Exception {//创建分词器Analyzer analyzer = new StandardAnalyzer();//创建指定的索引库FSDirectory directory = FSDirectory.open(new File("f:/dic"));//提供查询关键字//创建一个查询分析器: 默认查询域   分词器QueryParser queryParser = new QueryParser("name",analyzer);//查询条件Query query = queryParser.parse("description:java");//查询输入流IndexReader indexReader = IndexReader.open(directory);//创建查询对象IndexSearcher indexSearcher = new IndexSearcher(indexReader);//文档对象编号TopDocs topDocs = indexSearcher.search(query, 2);ScoreDoc[] scoreDocs = topDocs.scoreDocs;for (ScoreDoc scoreDoc : scoreDocs) {//获取每个文档编号int docId = scoreDoc.doc;Document doc = indexSearcher.doc(docId);//从文档中获得每个域值String id = doc.get("id");String name = doc.get("name");String pic = doc.get("pic");String price = doc.get("price");String description = doc.get("description");System.out.println(id+"----id");System.out.println(name+"----name");System.out.println(pic+"----pic");System.out.println(price+"----price");System.out.println(description+"----description");System.out.println("***********************************************");}indexReader.close();}
}

六删除索引

public class TestDeleteIndex {@Testpublic void delete() throws Exception {//分词器Analyzer analyzer = new StandardAnalyzer();//索引库FSDirectory directory = FSDirectory.open(new File("f:/dic"));//索引输出配置文件indexWriterConfigIndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);//索引输出流IndexWriter indexWriter = new IndexWriter(directory,writerConfig);//需要删除的词元Term term = new Term("id","2");//删除指定 只删除文档,不删除索引
        indexWriter.deleteDocuments(term);//删除所有,既删除文档又删除索引//indexWriter.deleteAll();
        indexWriter.commit();indexWriter.close();}
}

七更新索引

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;import java.io.File;
import java.io.IOException;public class TestUpdateIndex {@Testpublic void update() throws IOException {//更新索引库不会删除原来的索引,会删除原来的文档//分词器Analyzer analyzer = new StandardAnalyzer();//索引库FSDirectory directory = FSDirectory.open(new File("f:/dic"));//输出流配置文件IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);//输出流IndexWriter indexWriter = new IndexWriter(directory,writerConfig);//词元Term term = new Term("id","1");Document doc = new Document();TextField idField = new TextField("id",String.valueOf(6), Field.Store.YES);TextField nameField = new TextField("name","平凡的世界", Field.Store.YES);TextField priceField = new TextField("price",String.valueOf(9.9), Field.Store.YES);TextField picField = new TextField("pic","15522.jpg", Field.Store.YES);TextField descriptionField = new TextField("description","平凡的世界,平凡的你我", Field.Store.YES);doc.add(idField);doc.add(nameField);doc.add(picField);doc.add(priceField);doc.add(descriptionField);indexWriter.updateDocument(term,doc);indexWriter.commit();indexWriter.close();}
}

八用IK分词器进行分词

import dao.BookDao;
import domain.Book;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;public class TestCreateIndexIK {@Testpublic void testIK() throws Exception {//创建ik中文分词器Analyzer ikAnalyzer = new IKAnalyzer();//创建索引库FSDirectory directory = FSDirectory.open(new File("f:/dic"));//创建索引输出流配置对象IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_4_10_3, ikAnalyzer);//创建索引输出流对象IndexWriter indexWriter = new IndexWriter(directory, writerConfig);//准备数据BookDao bookDao = new BookDao();List<Book> bookList = bookDao.findAll();List<Document> documentList = new ArrayList<>();for (Book book : bookList) {//一条数据对应一个文件Document doc = new Document();//一个文件有多个域/**            id     name    pic     price       descripttion*   分词      N       Y       N       Y           Y*   索引      Y       Y       N       Y           Y*   存储      Y       Y       Y       Y           N* */StringField idField = new StringField("id", String.valueOf(book.getId()), Field.Store.YES);TextField nameField = new TextField("name", book.getName(), Field.Store.YES);StoredField picField = new StoredField("pic", book.getPic());TextField priceField = new TextField("price", String.valueOf(book.getPrice()), Field.Store.YES);TextField descriptionField = new TextField("description", book.getDescription(), Field.Store.YES);//将域对象添加到文档中
            doc.add(idField);doc.add(nameField);doc.add(picField);doc.add(priceField);doc.add(descriptionField);documentList.add(doc);}for (Document document : documentList) {indexWriter.addDocument(document);}indexWriter.commit();indexWriter.close();}
}

Field常用类型

Field类	数据类型	Analyzed 是否分词	Indexed 是否索引	Stored 是否存储	说明
StringField(FieldName, FieldValue,Store.YES))	字符串	N	Y	Y或N	这个Field用来构建一个字符串Field，但是不会进行分词，会将整个串存储在索引中，比如(订单号,身份证号等) 是否存储在文档中用Store.YES或Store.NO决定
LongField(FieldName, FieldValue,Store.YES)	Long型	Y	Y	Y或N	这个Field用来构建一个Long数字型Field，进行分词和索引，比如(价格) 是否存储在文档中用Store.YES或Store.NO决定
StoredField(FieldName, FieldValue)	重载方法，支持多种类型	N	N	Y	这个Field用来构建不同类型Field 不分析，不索引，但要Field存储在文档中
TextField(FieldName, FieldValue, Store.NO)或 TextField(FieldName, reader)	字符串或流	Y	Y	Y或N	如果是一个Reader, lucene猜测内容比较多,会采用Unstored的策略.

九扩展中文词库

引入三个配置文件

IKAnalyzer.cfg.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>  <comment>IK Analyzer 扩展配置</comment><!--用户可以在这里配置自己的扩展字典 --><entry key="ext_dict">ext.dic;</entry> <!--用户可以在这里配置自己的扩展停止词字典--><entry key="ext_stopwords">stopword.dic;</entry> </properties>

ext.dic

编程思想

stopword.dic 填一些停止词

a
an
and
are
as
at
be

地的得啊嗯

再执行IK中文分词时就会加上扩展中文词库进行分词

每次创建索引库时,请删除上次创建的内容

转载于:https://www.cnblogs.com/Cyan-W/p/9938510.html

lucene的基础入门相关推荐

Elasticsearch 基础入门
原文地址:Elasticsearch 基础入门博客地址:http://www.extlight.com 一.什么是 ElasticSearch ElasticSearch是一个基于 Lucene 的 ...
2020年关于Linux的知识技术合集（基础入门到高级进阶）
前言本文介绍下Linux如何从入门开始到高级进阶的学习路线. 整个体系专注于服务器后台开发,知识点包括C/C++,Linux,Nginx,ZeroMQ,MySQL,Redis,fastdfs,Mon ...
企业级搜索引擎solr零基础入门
企业级搜索引擎solr零基础入门(持续更新中-) 1.下载与安装官网地址:http://lucene.apache.org/solr/ 将下载的压缩包直接解压即可 2.服务启动 2.1进入bin目录 ...
用python循环语句求素数_Python基础入门_3条件语句和迭代循环
Python 基础入门前两篇: Python 基础入门--简介和环境配置 Python基础入门_2基础语法和变量类型这是第三篇内容,主要简单介绍条件语句和迭代循环语句,内容也比较简单,目录如下: 条 ...
MAYA 2022基础入门学习教程
流派:电子学习| MP4 |视频:h264,1280×720 |音频:AAC,48.0 KHz 语言:英语+中英文字幕(根据原英文字幕机译更准确)|大小解压后:3.41 GB |时长:4.5小时包含 ...
Blender 3.0基础入门学习教程 Introduction to Blender 3.0
成为Blender通才,通过这个基于项目的循序渐进课程学习所有主题的基础知识. 你会学到什么教程获取:Blender 3.0基础入门学习教程 Introduction to Blender 3.0- ...
三维地形制作软件 World Machine 基础入门学习教程
<World Machine课程>涵盖了你需要的一切,让你有一个坚实的基础来构建自己的高质量的电影或视频游戏地形. 你会学到什么为渲染或游戏开发创建高分辨率.高细节的地形. 基于Worl ...
SketchUp Pro 2021基础入门学习视频教程
SketchUp Pro 2021基础入门学习视频教程 1280X720 MP4 |视频:h264,1280×720 |音频:AAC,44.1 KHz,2 Ch 流派:电子学习|语言:英语+中文字幕( ...
Maya基础入门学习教程
Maya基础入门学习教程视频:.MKV, 1280x720, 共57节课时长 4小时25分钟,3GB 语言:英语+中文字幕(根据原英文字幕机译更准确)+原英文字幕指导老师:Shane Whitt ...

lucene的基础入门

一创建maven项目 lucene_1

二准备数据

三创建索引

四查看测试索引

五使用索引查询

六删除索引

七更新索引

八用IK分词器进行分词

九扩展中文词库

lucene的基础入门相关推荐

最新文章

热门文章

lucene的基础入门

一 创建maven项目 lucene_1

二 准备数据

三 创建索引

四 查看测试索引

五 使用索引查询

六 删除索引

七 更新索引

八 用IK分词器进行分词

九 扩展中文词库

lucene的基础入门相关推荐

最新文章

热门文章

一创建maven项目 lucene_1

二准备数据

三创建索引

四查看测试索引

五使用索引查询

六删除索引

七更新索引

八用IK分词器进行分词

九扩展中文词库