全文检索 使用最新lucene3.0.3+最新盘古分词 pangu2.4 .net 实例
开发环境 vs2015 winform 程序
1 首先需要下载对应的DLL 文章后面统一提供程序下载地址 里面都有
2 配置pangu的参数 也可以不配置 采用默认的即可
3 创建索引,将索引存放到本地
4 根据关键字查询本地索引
5 取得查询结果并展示
以上是主要的步骤,下面贴上主要代码,拿来即可用
Form1.Designer.cs
namespace lucuneTest {partial class Form1{/// <summary>/// 必需的设计器变量。/// </summary>private System.ComponentModel.IContainer components = null;/// <summary>/// 清理所有正在使用的资源。/// </summary>/// <param name="disposing">如果应释放托管资源,为 true;否则为 false。</param>protected override void Dispose(bool disposing){if (disposing && (components != null)){components.Dispose();}base.Dispose(disposing);}#region Windows 窗体设计器生成的代码/// <summary>/// 设计器支持所需的方法 - 不要修改/// 使用代码编辑器修改此方法的内容。/// </summary>private void InitializeComponent(){this.btnSearch = new System.Windows.Forms.Button();this.panel1 = new System.Windows.Forms.Panel();this.textBox2 = new System.Windows.Forms.TextBox();this.panel2 = new System.Windows.Forms.Panel();this.txtWords = new System.Windows.Forms.TextBox();this.btnCutWords = new System.Windows.Forms.Button();this.txtWords2 = new System.Windows.Forms.TextBox();this.txtResult = new System.Windows.Forms.TextBox();this.label3 = new System.Windows.Forms.Label();this.label4 = new System.Windows.Forms.Label();this.panel1.SuspendLayout();this.panel2.SuspendLayout();this.SuspendLayout();// // btnSearch// this.btnSearch.Location = new System.Drawing.Point(413, 48);this.btnSearch.Name = "btnSearch";this.btnSearch.Size = new System.Drawing.Size(75, 23);this.btnSearch.TabIndex = 3;this.btnSearch.Text = "查询";this.btnSearch.UseVisualStyleBackColor = true;this.btnSearch.Click += new System.EventHandler(this.btnSearch_Click);// // panel1// this.panel1.Controls.Add(this.label3);this.panel1.Controls.Add(this.label4);this.panel1.Controls.Add(this.txtWords2);this.panel1.Controls.Add(this.btnCutWords);this.panel1.Controls.Add(this.txtWords);this.panel1.Controls.Add(this.textBox2);this.panel1.Controls.Add(this.btnSearch);this.panel1.Dock = System.Windows.Forms.DockStyle.Top;this.panel1.Location = new System.Drawing.Point(0, 0);this.panel1.Name = "panel1";this.panel1.Size = new System.Drawing.Size(884, 92);this.panel1.TabIndex = 5;// // textBox2// this.textBox2.Location = new System.Drawing.Point(36, 29);this.textBox2.Name = "textBox2";this.textBox2.Size = new System.Drawing.Size(328, 21);this.textBox2.TabIndex = 5;this.textBox2.Text = "天龙八部";// // panel2// this.panel2.Controls.Add(this.txtResult);this.panel2.Dock = System.Windows.Forms.DockStyle.Fill;this.panel2.Location = new System.Drawing.Point(0, 92);this.panel2.Name = "panel2";this.panel2.Size = new System.Drawing.Size(884, 378);this.panel2.TabIndex = 6;// // txtWords// this.txtWords.Location = new System.Drawing.Point(548, 12);this.txtWords.Multiline = true;this.txtWords.Name = "txtWords";this.txtWords.ScrollBars = System.Windows.Forms.ScrollBars.Both;this.txtWords.Size = new System.Drawing.Size(324, 38);this.txtWords.TabIndex = 7;// // btnCutWords// this.btnCutWords.Location = new System.Drawing.Point(413, 19);this.btnCutWords.Name = "btnCutWords";this.btnCutWords.Size = new System.Drawing.Size(75, 23);this.btnCutWords.TabIndex = 8;this.btnCutWords.Text = "分词--》";this.btnCutWords.UseVisualStyleBackColor = true;this.btnCutWords.Click += new System.EventHandler(this.btnCutWords_Click);// // txtWords2// this.txtWords2.Location = new System.Drawing.Point(548, 51);this.txtWords2.Multiline = true;this.txtWords2.Name = "txtWords2";this.txtWords2.ScrollBars = System.Windows.Forms.ScrollBars.Both;this.txtWords2.Size = new System.Drawing.Size(324, 38);this.txtWords2.TabIndex = 9;// // txtResult// this.txtResult.Dock = System.Windows.Forms.DockStyle.Fill;this.txtResult.Location = new System.Drawing.Point(0, 0);this.txtResult.Multiline = true;this.txtResult.Name = "txtResult";this.txtResult.ScrollBars = System.Windows.Forms.ScrollBars.Both;this.txtResult.Size = new System.Drawing.Size(884, 378);this.txtResult.TabIndex = 8;// // label3// this.label3.AutoSize = true;this.label3.Location = new System.Drawing.Point(513, 24);this.label3.Name = "label3";this.label3.Size = new System.Drawing.Size(29, 12);this.label3.TabIndex = 11;this.label3.Text = "盘古";// // label4// this.label4.AutoSize = true;this.label4.Location = new System.Drawing.Point(513, 51);this.label4.Name = "label4";this.label4.Size = new System.Drawing.Size(29, 12);this.label4.TabIndex = 10;this.label4.Text = "标准";// // Form1// this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 12F);this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;this.ClientSize = new System.Drawing.Size(884, 470);this.Controls.Add(this.panel2);this.Controls.Add(this.panel1);this.Name = "Form1";this.Text = "Form1";this.panel1.ResumeLayout(false);this.panel1.PerformLayout();this.panel2.ResumeLayout(false);this.panel2.PerformLayout();this.ResumeLayout(false);}#endregionprivate System.Windows.Forms.Button btnSearch;private System.Windows.Forms.Panel panel1;private System.Windows.Forms.Panel panel2;private System.Windows.Forms.TextBox textBox2;private System.Windows.Forms.Button btnCutWords;private System.Windows.Forms.TextBox txtWords;private System.Windows.Forms.TextBox txtWords2;private System.Windows.Forms.TextBox txtResult;private System.Windows.Forms.Label label3;private System.Windows.Forms.Label label4;} }
View Code
Form1.cs
using Lucene.Net.Analysis; using Lucene.Net.Analysis.Standard; using Lucene.Net.Store; using LN = Lucene.Net; using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Windows.Forms; using Lucene.Net.Index; using Lucene.Net.Documents; using Lucene.Net.Analysis.PanGu; using Lucene.Net.Search; using PanGu; using Lucene.Net.QueryParsers; using PanGu.HighLight; using System.Diagnostics;namespace lucuneTest {public partial class Form1 : Form{public Form1(){InitializeComponent();//定义盘古分词的xml引用路径 PanGu.Segment.Init(PanGuXmlPath);//创建索引 createIndex();}/// <summary>/// 创建索引/// </summary>void createIndex(){//IndexWriter第三个参数:true指重新创建索引,false指从当前索引追加....此处为新建索引所以为true,后续应该建立的索引应采用追加IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);Stopwatch sw = new Stopwatch();sw.Start();for (int i = 1; i < 101; i++){AddIndex(writer, "我的标题" + i, i + "这是我的标题啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));AddIndex(writer, "射雕英雄传作者金庸" + i, i + "我是欧阳锋" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));AddIndex(writer, "天龙八部12" + i, i + "慕容废墟,上官静儿,打撒飞艾丝凡爱上,虚竹" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));AddIndex(writer, "倚天屠龙记12" + i, i + "张无忌机" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));AddIndex(writer, "三国演义" + i, i + "刘备,张飞,关羽还有谁来着 忘记啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));}//释放资源 writer.Optimize();writer.Dispose();string time = ((double)sw.ElapsedMilliseconds / 1000).ToString();sw.Stop();Console.WriteLine("创建100条记录需要时长:" + time + "秒");}/// <summary>/// 创建索引/// </summary>/// <param name="analyzer"></param>/// <param name="title"></param>/// <param name="content"></param>private void AddIndex(IndexWriter writer, string title, string content, string date){try{Document doc = new Document();doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));//存储且索引doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));//存储且索引doc.Add(new Field("addtime", date, Field.Store.YES, Field.Index.NOT_ANALYZED));//不分词存储 writer.AddDocument(doc);}catch (FileNotFoundException fnfe){throw fnfe;}catch (Exception ex){throw ex;}}/// <summary>/// 分词测试/// </summary>/// <param name="sender"></param>/// <param name="e"></param>private void btnCutWords_Click(object sender, EventArgs e){this.txtWords.Text = "";Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);txtWords.Text = cutWords(this.textBox2.Text, PanGuAnalyzer);//盘古分词txtWords2.Text = cutWords(this.textBox2.Text, analyzer); //自带标准分词 }/// <summary>/// 分词方法/// </summary>/// <param name="words">待分词内容</param>/// <param name="analyzer"></param>/// <returns></returns>private string cutWords(string words, Analyzer analyzer){string resultStr = "";System.IO.StringReader reader = new System.IO.StringReader(words);Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader);bool hasNext = ts.IncrementToken();Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;while (hasNext){ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();resultStr += ita.Term + "|";hasNext = ts.IncrementToken();}ts.CloneAttributes();reader.Close();analyzer.Close();return resultStr;}protected IList<Article> list = new List<Article>();/// <summary>/// 查询多个字段/// </summary>private void SearchIndex(string searchKey){Dictionary<string, string> dic = new Dictionary<string, string>();BooleanQuery bQuery = new BooleanQuery();#region 一个字段查询 //if (!string.IsNullOrEmpty(title))//{// title = GetKeyWordsSplitBySpace(title);// QueryParser parse = new QueryParser(LN.Util.Version.LUCENE_30, "title", PanGuAnalyzer);//一个字段查询 // Query query = parse.Parse(title);// parse.DefaultOperator = QueryParser.Operator.OR;// bQuery.Add(query, new Occur());// dic.Add("title", title);//}#endregionstring[] fileds = { "title", "content" };//查询字段 searchKey = GetKeyWordsSplitBySpace(searchKey);QueryParser parse = new MultiFieldQueryParser(LN.Util.Version.LUCENE_30, fileds, PanGuAnalyzer);//多个字段查询Query query = parse.Parse(searchKey);bQuery.Add(query, new Occur());dic.Add("title", searchKey);dic.Add("content", searchKey);if (bQuery != null && bQuery.GetClauses().Length > 0){GetSearchResult(bQuery, dic);}}/// <summary>/// 获取/// </summary>/// <param name="bQuery"></param>private void GetSearchResult(BooleanQuery bQuery, Dictionary<string, string> dicKeywords){IndexSearcher search = new IndexSearcher(direcotry, true);// Stopwatch stopwatch = Stopwatch.StartNew();//SortField构造函数第三个字段true为降序,false为升序Sort sort = new Sort(new SortField("addtime", SortField.DOC, true));int maxNum = 100;//查询条数TopDocs docs = search.Search(bQuery, (Filter)null, maxNum, sort);if (docs != null){for (int i = 0; i < docs.TotalHits && i < maxNum; i++){Document doc = search.Doc(docs.ScoreDocs[i].Doc);Article model = new Article(){Title = doc.Get("title").ToString(),Content = doc.Get("content").ToString(),AddTime = doc.Get("addtime").ToString()};list.Add(SetHighlighter(dicKeywords, model));}}}/// <summary>/// 索引存放目录/// </summary>protected string IndexDic{get{return Application.StartupPath + "/IndexDic";}}public LN.Store.Directory direcotry{get{ //创建索引目录if (!System.IO.Directory.Exists(IndexDic)){System.IO.Directory.CreateDirectory(IndexDic);}LN.Store.Directory direcotry = FSDirectory.Open(IndexDic);return direcotry;}}/// <summary>/// 盘古分词的配置文件/// </summary>protected string PanGuXmlPath{get{return Application.StartupPath + "/PanGu/PanGu.xml";}}/// <summary>/// 盘古分词器/// </summary>protected Analyzer PanGuAnalyzer{get { return new PanGuAnalyzer(); }}/// <summary>/// 处理关键字为索引格式/// </summary>/// <param name="keywords"></param>/// <returns></returns>private string GetKeyWordsSplitBySpace(string keywords){PanGuTokenizer ktTokenizer = new PanGuTokenizer();StringBuilder result = new StringBuilder();ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);foreach (WordInfo word in words){if (word == null){continue;}result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));}return result.ToString().Trim();}/// <summary>/// 设置关键字高亮/// </summary>/// <param name="dicKeywords">关键字列表</param>/// <param name="model">返回的数据模型</param>/// <returns></returns>private Article SetHighlighter(Dictionary<string, string> dicKeywords, Article model){SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"green\">", "</font>");Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());highlighter.FragmentSize = 50;string strTitle = string.Empty;string strContent = string.Empty;dicKeywords.TryGetValue("title", out strTitle);dicKeywords.TryGetValue("content", out strContent);if (!string.IsNullOrEmpty(strTitle)){var transStr = highlighter.GetBestFragment(strTitle, model.Title);model.Title = string.IsNullOrEmpty(transStr) ? model.Title : transStr;}if (!string.IsNullOrEmpty(strContent)){var transStr = highlighter.GetBestFragment(strContent, model.Content);model.Content = string.IsNullOrEmpty(transStr) ? model.Content : transStr;}return model;}/// <summary>/// 查询方法/// </summary>/// <param name="sender"></param>/// <param name="e"></param>private void btnSearch_Click(object sender, EventArgs e){list.Clear();this.txtResult.Text = "";SearchIndex(this.textBox2.Text);if (list.Count == 0){this.txtResult.Text = "没有查询到结果";return;}for (int i = 0; i < list.Count; i++){this.txtResult.Text += "标题:" + list[i].Title + " 内容:" + list[i].Content + " 时间:" + list[i].AddTime + "\r\n";}}#region 删除索引数据(根据id) /// <summary> /// 删除索引数据(根据id) /// </summary> /// <param name="id"></param> /// <returns></returns> public bool Delete(string id){bool IsSuccess = false;Term term = new Term("id", id);//Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Version version = new Version(); //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询 //Query query = parser.Parse("小王"); //IndexReader reader = IndexReader.Open(directory_luce, false); //reader.DeleteDocuments(term); //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>"); //reader.Dispose(); IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query); ////writer.DeleteAll(); writer.Commit();//writer.Optimize();// IsSuccess = writer.HasDeletions();writer.Dispose();return IsSuccess;}#endregion#region 删除全部索引数据 /// <summary> /// 删除全部索引数据 /// </summary> /// <returns></returns> public bool DeleteAll(){bool IsSuccess = true;try{IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);writer.DeleteAll();writer.Commit();//writer.Optimize();// IsSuccess = writer.HasDeletions();writer.Dispose();}catch{IsSuccess = false;}return IsSuccess;}#endregion}public class Article{public string Id{set;get;}public string Title{set;get;}public string Content{set;get;}public string AddTime{set;get;}} }
View Code
实例下载地址:lucuneTest.zip
转载于:https://www.cnblogs.com/fj99/p/5513006.html
全文检索 使用最新lucene3.0.3+最新盘古分词 pangu2.4 .net 实例相关推荐
- 让盘古分词支持最新的Lucene.Net 3.0.3
原文:让盘古分词支持最新的Lucene.Net 3.0.3 好多年没升级过的Lucene.Net最近居然升级了,到了3.0.3后接口发生了很大变化,原来好多分词库都不能用了,所以上次我把MMSeg给修 ...
- 安卓最新系统_安卓最新10.0系统,新增功能都在这了!
熟悉安卓系统的差友们应该知道,谷歌喜欢用某一种点心来命名每一代的安卓系统. 从最开始安卓 1.5 的 Cupcake( 纸杯蛋糕 ),到最新 8.0 的 Oreo( 奥利奥 )和 9.0 Pie( 派 ...
- 【Cocos2d-X(2.x) 游戏开发系列之二】cocos2dx最新2.0.1版本跨平台整合NDK+Xcode编译到Android...
本站文章均为 李华明Himi 原创,转载务必在明显处注明: 转载自[黑米GameDev街区] 原文链接: http://www.himigame.com/hibernate/783.html ☞ 点击 ...
- Android 3.0 SDK 最新官方下载
Android 3.0 SDK 最新官方下载 Windows http://dl.google.com/android/android-sdk_r10-windows.zip http://dl.go ...
- Cartopy 0.20 最新功能 —— Cartopy 装不上别慌,内附解决方案
Cartopy 0.20 最新功能 背景介绍 Cartopy 是英国气象局开发的地图绘图包,实现了 Basemap 的大部分功能,利用了强大的PROJ.4.NumPy和Shapely库,并在Matpl ...
- red hat linux 9.0下载地址集合,Red Hat Linux 9.0 iso最新下载地址
Red Hat Linux 9.0 iso最新下载地址(注:用迅雷或Flashget同时下载三个iso,总的速度能够达到100k左右) 一.Red Hat Linux 9.0 iso下载 1.安装盘下 ...
- java对接海康威视SDK(win64、linux64),处理播放实时流转码,按时间回放功能,附海康威视最新3.0摄像头监控web端实例+插件
java对接海康威视SDK(win64.linux64),处理播放实时流转码,按时间回放功能 准备工作 遇到的坑 调用步骤 参数配置 DeviceEnums 初始化sdk 注册 大华的sdk叫登陆海康 ...
- php pdt,PDT(php开发环境) v3.2.0 官方最新正式版 - 爱win10
其他相关 PDT(php开发环境) v3.2.0 官方最新正式版 - 爱win102020年12月24日 爱win10收集的PDT是开源的PHP集成开发环境(IDE).PDT可为Eclipse平台提供 ...
- 解决 ubuntu 14.04下,eclipse adt-bundle-linux 闪退的问题,最新ADT-23.0.7
解决 ubuntu 14.04下,eclipse adt-bundle-linux 闪退的问题,最新ADT-23.0.7 今天 早上,早早的来到办公室,想把领导昨天布置的任务,快速完成.其实,就是很简 ...
最新文章
- mysql的常用内置函数
- 31.Linux/Unix 系统编程手册(上) -- 线程:线程安全和每线程存储
- Kotlin教程:Kotlin入门
- CAN网络管理Autosar(入门)
- VOA Special English Facebook Stock Goes on Sale (中英文对照)
- JavaScript-⑤代码
- 终面(HR面)_职业竞争力和职业规划
- 剖析kubernetes集群内部DNS解析原理
- eclipse中的servers不见了解决方法
- 测试单核cpu和多核cpu执行java多线程任务的效率
- 服务器日志法网站分析的原理及优缺点
- 灰色页面,HTML灰色页面
- 计算机无法信任的英文,关于信任的英语名言佳句语录
- 三相异步电机的平衡方程式
- intelliJ IDES MySql数据库JDBC连接代码
- 【测试表征】你想要的表征,这里全都有!(一)
- 【转载】ADB命令使用大全
- 舍不得卸载的5款宝藏APP,每款都是精品中的精品
- Laravel 接受Ajax的POST请求
- java 自己实现 解析处理user-agent 获取设备信息 ip-ua转化归因
热门文章
- java domain_为什么Java Bean被叫做domain类?
- win10计算机未连接到网络适配器,Win10网络适配器显示未连接怎么解决 - 系统之家...
- win10计算机未连接到网络适配器,win10系统没有线网络适配器显示未连接怎么处理?...
- Linux内核深度解析
- 外汇保证金交易理性的加仓方式
- 【图像处理】双眼去掩蔽
- 公众号给微信服务器响应数据
- informatica 遇到ORA-26002
- 如何:获取对 DTE 和 DTE2 对象的引用
- DTE和DCE的区别下:配置串口链路通信