项目场景:

提示:根据用户最近浏览的文章标题进行关键词摘要,并根据结果,查询ES

这里关键的是根据内容获取关键词摘要的方法


关键词摘要

使用IK中文分词

pom文件

 <!--中文分析器--><dependency><groupId>com.janeluo</groupId><artifactId>ikanalyzer</artifactId><version>2012_u6</version></dependency>

代码

package com.ruoyi.common.utils;import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;/*** @description:文本摘要提取文中重要的关键句子,使用top-n关键词在句子中的比例关系* @author: ycc* @since: 2022/12/13 15:07**/
@Component
public  class KeywordsSummary implements InitializingBean {@Value("${stopWord.path}")private String path;//保留关键词数量int N = 10;//关键词间的距离阀值int CLUSTER_THRESHOLD = 5;//前top-n句子int TOP_SENTENCES = 10;//最大边缘相关阀值double λ = 0.4;//句子得分使用方法final Set<String> styleSet = new HashSet<String>();//停用词列表Set<String> stopWords = new HashSet<String>();//句子编号及分词列表Map<Integer,List<String>> sentSegmentWords = null;/*** @description: 根据内容获取前N频率的关键词的权重比* @author: ycc* @date:  2022/12/13 17:29* @param: [title]* @return: java.util.List<java.lang.String>**/public List<String> getKeywordsList (String title){//利用IK分词组件将文本分词,返回分词列表List<String> words = this.IKSegment(title);//统计分词频率Map<String,Integer> wordsMap = new HashMap<String,Integer>();for(String word:words){Integer val = wordsMap.get(word);wordsMap.put(word,val == null ? 1: val + 1);}//使用优先队列自动排序Queue<Map.Entry<String, Integer>> wordsQueue=new PriorityQueue<Map.Entry<String,Integer>>(wordsMap.size(),new Comparator<Map.Entry<String,Integer>>(){//            @Overridepublic int compare(Entry<String, Integer> o1,Entry<String, Integer> o2) {return o2.getValue()-o1.getValue();}});wordsQueue.addAll(wordsMap.entrySet());if( N > wordsMap.size())N = wordsQueue.size();//取前N个频次最高的词存在wordsListList<String> wordsList = new ArrayList<String>(N);//top-n关键词Integer sum = 0;Map<String,Integer> map = new HashMap<>();Map<String,Double> resultMap = new HashMap<>();for(int i = 0;i < N;i++){Entry<String,Integer> entry= wordsQueue.poll();wordsList.add(entry.getKey());Integer value = entry.getValue();map.put(entry.getKey(),value);sum+=value;}for (Map.Entry<String,Integer> entry : map.entrySet()) {String key = entry.getKey();Integer value = entry.getValue();double f1 = new BigDecimal((float)value/sum).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();resultMap.put(key,f1);}return wordsList;}/*** 加载停用词* @param path*/private void loadStopWords(String path){BufferedReader br=null;try{InputStreamReader reader = new InputStreamReader(new FileInputStream(path),"utf-8");br = new BufferedReader(reader);String line=null;while((line=br.readLine())!=null){stopWords.add(line);}br.close();}catch(IOException e){e.printStackTrace();}}/*** @Author:ycc* @Description:利用正则将文本拆分成句子* @Date:*/private List<String> SplitSentences(String text){List<String> sentences = new ArrayList<String>();String regEx = "[!?。!?.]";Pattern p = Pattern.compile(regEx);String[] sents = p.split(text);Matcher m = p.matcher(text);int sentsLen = sents.length;if(sentsLen>0){  //把每个句子的末尾标点符号加上int index = 0;while(index < sentsLen){if(m.find()){sents[index] += m.group();}index++;}}for(String sentence:sents){//文章从网页上拷贝过来后遗留下来的没有处理掉的html的标志sentence=sentence.replaceAll("(&rdquo;|&ldquo;|&mdash;|&lsquo;|&rsquo;|&middot;|&quot;|&darr;|&bull;)", "");sentences.add(sentence.trim());}return sentences;}/*** 这里使用IK进行分词* @param text* @return*/private List<String> IKSegment(String text){List<String> wordList = new ArrayList<String>();Reader reader = new StringReader(text);IKSegmenter ik = new IKSegmenter(reader,true);Lexeme lex = null;try {while((lex=ik.next())!=null){String word=lex.getLexemeText();if(word.equals("nbsp") || this.stopWords.contains(word))continue;if(word.length()>1 && word!="\t")wordList.add(word);}} catch (IOException e) {e.printStackTrace();}return wordList;}/*** 每个句子得分  (keywordsLen*keywordsLen/totalWordsLen)* @param sentences 分句* @param topnWords keywords top-n关键词* @return*/private Map<Integer,Double> scoreSentences(List<String> sentences,List<String> topnWords){Map<Integer, Double> scoresMap=new LinkedHashMap<Integer,Double>();//句子编号,得分sentSegmentWords=new HashMap<Integer,List<String>>();int sentence_idx=-1;//句子编号for(String sentence:sentences){sentence_idx+=1;List<String> words=this.IKSegment(sentence);//对每个句子分词
//            List<String> words= HanLP.segment(sentence);sentSegmentWords.put(sentence_idx, words);List<Integer> word_idx=new ArrayList<Integer>();//每个关词键在本句子中的位置for(String word:topnWords){if(words.contains(word)){word_idx.add(words.indexOf(word));}elsecontinue;}if(word_idx.size()==0)continue;Collections.sort(word_idx);//对于两个连续的单词,利用单词位置索引,通过距离阀值计算一个族List<List<Integer>> clusters=new ArrayList<List<Integer>>();//根据本句中的关键词的距离存放多个词族List<Integer> cluster=new ArrayList<Integer>();cluster.add(word_idx.get(0));int i=1;while(i<word_idx.size()){if((word_idx.get(i)-word_idx.get(i-1))<this.CLUSTER_THRESHOLD)cluster.add(word_idx.get(i));else{clusters.add(cluster);cluster=new ArrayList<Integer>();cluster.add(word_idx.get(i));}i+=1;}clusters.add(cluster);//对每个词族打分,选择最高得分作为本句的得分double max_cluster_score=0.0;for(List<Integer> clu:clusters){int keywordsLen=clu.size();//关键词个数int totalWordsLen=clu.get(keywordsLen-1)-clu.get(0)+1;//总的词数double score=1.0*keywordsLen*keywordsLen/totalWordsLen;if(score>max_cluster_score)max_cluster_score=score;}scoresMap.put(sentence_idx,max_cluster_score);}return scoresMap;}/*** @Author:ycc* @Description:利用均值方差自动文摘* @Date:*/public String SummaryMeanstdTxt(String text){//将文本拆分成句子列表List<String> sentencesList = this.SplitSentences(text);//利用IK分词组件将文本分词,返回分词列表List<String> words = this.IKSegment(text);
//        List<Term> words1= HanLP.segment(text);//统计分词频率Map<String,Integer> wordsMap = new HashMap<String,Integer>();for(String word:words){Integer val = wordsMap.get(word);wordsMap.put(word,val == null ? 1: val + 1);}//使用优先队列自动排序Queue<Map.Entry<String, Integer>> wordsQueue=new PriorityQueue<Map.Entry<String,Integer>>(wordsMap.size(),new Comparator<Map.Entry<String,Integer>>(){//            @Overridepublic int compare(Entry<String, Integer> o1,Entry<String, Integer> o2) {return o2.getValue()-o1.getValue();}});wordsQueue.addAll(wordsMap.entrySet());if( N > wordsMap.size())N = wordsQueue.size();//取前N个频次最高的词存在wordsListList<String> wordsList = new ArrayList<String>(N);//top-n关键词for(int i = 0;i < N;i++){Entry<String,Integer> entry= wordsQueue.poll();wordsList.add(entry.getKey());}//利用频次关键字,给句子打分,并对打分后句子列表依据得分大小降序排序Map<Integer,Double> scoresLinkedMap = scoreSentences(sentencesList,wordsList);//返回的得分,从第一句开始,句子编号的自然顺序//approach1,利用均值和标准差过滤非重要句子Map<Integer,String> keySentence = new LinkedHashMap<Integer,String>();//句子得分均值double sentenceMean = 0.0;for(double value:scoresLinkedMap.values()){sentenceMean += value;}sentenceMean /= scoresLinkedMap.size();//句子得分标准差double sentenceStd=0.0;for(Double score:scoresLinkedMap.values()){sentenceStd += Math.pow((score-sentenceMean), 2);}sentenceStd = Math.sqrt(sentenceStd / scoresLinkedMap.size());for(Map.Entry<Integer, Double> entry:scoresLinkedMap.entrySet()){//过滤低分句子if(entry.getValue()>(sentenceMean+0.5*sentenceStd))keySentence.put(entry.getKey(), sentencesList.get(entry.getKey()));}StringBuilder sb = new StringBuilder();for(int  index:keySentence.keySet())sb.append(keySentence.get(index));return sb.toString();}/*** @Author:ycc* @Description:默认返回排序得分top-n句子* @Date:*/public String SummaryTopNTxt(String text){//将文本拆分成句子列表List<String> sentencesList = this.SplitSentences(text);//利用IK分词组件将文本分词,返回分词列表List<String> words = this.IKSegment(text);
//        List<Term> words1= HanLP.segment(text);//统计分词频率Map<String,Integer> wordsMap = new HashMap<String,Integer>();for(String word:words){Integer val = wordsMap.get(word);wordsMap.put(word,val == null ? 1: val + 1);}//使用优先队列自动排序Queue<Map.Entry<String, Integer>> wordsQueue=new PriorityQueue<Map.Entry<String,Integer>>(wordsMap.size(),new Comparator<Map.Entry<String,Integer>>(){//            @Overridepublic int compare(Entry<String, Integer> o1,Entry<String, Integer> o2) {return o2.getValue()-o1.getValue();}});wordsQueue.addAll(wordsMap.entrySet());if( N > wordsMap.size())N = wordsQueue.size();//取前N个频次最高的词存在wordsListList<String> wordsList = new ArrayList<String>(N);//top-n关键词for(int i = 0;i < N;i++){Entry<String,Integer> entry= wordsQueue.poll();wordsList.add(entry.getKey());}//利用频次关键字,给句子打分,并对打分后句子列表依据得分大小降序排序Map<Integer,Double> scoresLinkedMap = scoreSentences(sentencesList,wordsList);//返回的得分,从第一句开始,句子编号的自然顺序List<Map.Entry<Integer, Double>> sortedSentList = new ArrayList<Map.Entry<Integer,Double>>(scoresLinkedMap.entrySet());//按得分从高到底排序好的句子,句子编号与得分//System.setProperty("java.util.Arrays.useLegacyMergeSort", "true");Collections.sort(sortedSentList, new Comparator<Map.Entry<Integer, Double>>(){//            @Overridepublic int compare(Entry<Integer, Double> o1,Entry<Integer, Double> o2) {return o2.getValue() == o1.getValue() ? 0 :(o2.getValue() > o1.getValue() ? 1 : -1);}});//approach2,默认返回排序得分top-n句子Map<Integer,String> keySentence = new TreeMap<Integer,String>();int count = 0;for(Map.Entry<Integer, Double> entry:sortedSentList){count++;keySentence.put(entry.getKey(), sentencesList.get(entry.getKey()));if(count == this.TOP_SENTENCES)break;}StringBuilder sb=new StringBuilder();for(int  index:keySentence.keySet())sb.append(keySentence.get(index));return sb.toString();}/*** @Author:ycc* @Description:利用最大边缘相关自动文摘* @Date:*/public String SummaryMMRNTxt(String text){//将文本拆分成句子列表List<String> sentencesList = this.SplitSentences(text);//利用IK分词组件将文本分词,返回分词列表List<String> words = this.IKSegment(text);
//        List<Term> words1= HanLP.segment(text);//统计分词频率Map<String,Integer> wordsMap = new HashMap<String,Integer>();for(String word:words){Integer val = wordsMap.get(word);wordsMap.put(word,val == null ? 1: val + 1);}//使用优先队列自动排序Queue<Map.Entry<String, Integer>> wordsQueue=new PriorityQueue<Map.Entry<String,Integer>>(wordsMap.size(),new Comparator<Map.Entry<String,Integer>>(){//            @Overridepublic int compare(Entry<String, Integer> o1,Entry<String, Integer> o2) {return o2.getValue()-o1.getValue();}});wordsQueue.addAll(wordsMap.entrySet());if( N > wordsMap.size())N = wordsQueue.size();//取前N个频次最高的词存在wordsListList<String> wordsList = new ArrayList<String>(N);//top-n关键词for(int i = 0;i < N;i++){Entry<String,Integer> entry= wordsQueue.poll();wordsList.add(entry.getKey());}//利用频次关键字,给句子打分,并对打分后句子列表依据得分大小降序排序Map<Integer,Double> scoresLinkedMap = scoreSentences(sentencesList,wordsList);//返回的得分,从第一句开始,句子编号的自然顺序List<Map.Entry<Integer, Double>> sortedSentList = new ArrayList<Map.Entry<Integer,Double>>(scoresLinkedMap.entrySet());//按得分从高到底排序好的句子,句子编号与得分//System.setProperty("java.util.Arrays.useLegacyMergeSort", "true");Collections.sort(sortedSentList, new Comparator<Map.Entry<Integer, Double>>(){//            @Overridepublic int compare(Entry<Integer, Double> o1,Entry<Integer, Double> o2) {return o2.getValue() == o1.getValue() ? 0 :(o2.getValue() > o1.getValue() ? 1 : -1);}});//approach3,利用最大边缘相关,返回前top-n句子if(sentencesList.size()==2){return sentencesList.get(0)+sentencesList.get(1);}else if(sentencesList.size()==1)return sentencesList.get(0);Map<Integer,String> keySentence = new TreeMap<Integer,String>();int count = 0;Map<Integer,Double> MMR_SentScore = MMR(sortedSentList);for(Map.Entry<Integer, Double> entry:MMR_SentScore.entrySet()){count++;int sentIndex=entry.getKey();String sentence=sentencesList.get(sentIndex);keySentence.put(sentIndex, sentence);if(count==this.TOP_SENTENCES)break;}StringBuilder sb=new StringBuilder();for(int  index:keySentence.keySet())sb.append(keySentence.get(index));return sb.toString();}/*** 计算文本摘要* @param text* @param style(meanstd,default,MMR)* @return*/public String summarize(String text,String style){try {if(!styleSet.contains(style) || text.trim().equals(""))throw new IllegalArgumentException("方法 summarize(String text,String style)中text不能为空,style必须是meanstd、default或者MMR");} catch (Exception e) {e.printStackTrace();System.exit(1);}//将文本拆分成句子列表List<String> sentencesList = this.SplitSentences(text);//利用IK分词组件将文本分词,返回分词列表List<String> words = this.IKSegment(text);
//        List<Term> words1= HanLP.segment(text);//统计分词频率Map<String,Integer> wordsMap = new HashMap<String,Integer>();for(String word:words){Integer val = wordsMap.get(word);wordsMap.put(word,val == null ? 1: val + 1);}//使用优先队列自动排序Queue<Map.Entry<String, Integer>> wordsQueue=new PriorityQueue<Map.Entry<String,Integer>>(wordsMap.size(),new Comparator<Map.Entry<String,Integer>>(){//            @Overridepublic int compare(Entry<String, Integer> o1,Entry<String, Integer> o2) {return o2.getValue()-o1.getValue();}});wordsQueue.addAll(wordsMap.entrySet());if( N > wordsMap.size())N = wordsQueue.size();//取前N个频次最高的词存在wordsListList<String> wordsList = new ArrayList<String>(N);//top-n关键词for(int i = 0;i < N;i++){Entry<String,Integer> entry= wordsQueue.poll();wordsList.add(entry.getKey());}for (String s : wordsList) {System.out.println("summarize:"+s);}//利用频次关键字,给句子打分,并对打分后句子列表依据得分大小降序排序Map<Integer,Double> scoresLinkedMap = scoreSentences(sentencesList,wordsList);//返回的得分,从第一句开始,句子编号的自然顺序Map<Integer,String> keySentence=null;//approach1,利用均值和标准差过滤非重要句子if(style.equals("meanstd")){keySentence = new LinkedHashMap<Integer,String>();//句子得分均值double sentenceMean = 0.0;for(double value:scoresLinkedMap.values()){sentenceMean += value;}sentenceMean /= scoresLinkedMap.size();//句子得分标准差double sentenceStd=0.0;for(Double score:scoresLinkedMap.values()){sentenceStd += Math.pow((score-sentenceMean), 2);}sentenceStd = Math.sqrt(sentenceStd / scoresLinkedMap.size());for(Map.Entry<Integer, Double> entry:scoresLinkedMap.entrySet()){//过滤低分句子if(entry.getValue()>(sentenceMean+0.5*sentenceStd))keySentence.put(entry.getKey(), sentencesList.get(entry.getKey()));}}List<Map.Entry<Integer, Double>> sortedSentList = new ArrayList<Map.Entry<Integer,Double>>(scoresLinkedMap.entrySet());//按得分从高到底排序好的句子,句子编号与得分//System.setProperty("java.util.Arrays.useLegacyMergeSort", "true");Collections.sort(sortedSentList, new Comparator<Map.Entry<Integer, Double>>(){//            @Overridepublic int compare(Entry<Integer, Double> o1,Entry<Integer, Double> o2) {return o2.getValue() == o1.getValue() ? 0 :(o2.getValue() > o1.getValue() ? 1 : -1);}});//approach2,默认返回排序得分top-n句子if(style.equals("default")){keySentence = new TreeMap<Integer,String>();int count = 0;for(Map.Entry<Integer, Double> entry:sortedSentList){count++;keySentence.put(entry.getKey(), sentencesList.get(entry.getKey()));if(count == this.TOP_SENTENCES)break;}}//approach3,利用最大边缘相关,返回前top-n句子if(style.equals("MMR")){if(sentencesList.size()==2){return sentencesList.get(0)+sentencesList.get(1);}else if(sentencesList.size()==1)return sentencesList.get(0);keySentence = new TreeMap<Integer,String>();int count = 0;Map<Integer,Double> MMR_SentScore = MMR(sortedSentList);for(Map.Entry<Integer, Double> entry:MMR_SentScore.entrySet()){count++;int sentIndex=entry.getKey();String sentence=sentencesList.get(sentIndex);keySentence.put(sentIndex, sentence);if(count==this.TOP_SENTENCES)break;}}StringBuilder sb=new StringBuilder();for(int  index:keySentence.keySet())sb.append(keySentence.get(index));//System.out.println("summarize out...");return sb.toString();}/*** 最大边缘相关(Maximal Marginal Relevance),根据λ调节准确性和多样性* max[λ*score(i) - (1-λ)*max[similarity(i,j)]]:score(i)句子的得分,similarity(i,j)句子i与j的相似度* User-tunable diversity through λ parameter* - High λ= Higher accuracy* - Low λ= Higher diversity* @param sortedSentList 排好序的句子,编号及得分* @return*/private Map<Integer,Double> MMR(List<Map.Entry<Integer, Double>> sortedSentList){//System.out.println("MMR In...");double[][] simSentArray=sentJSimilarity();//所有句子的相似度Map<Integer,Double> sortedLinkedSent=new LinkedHashMap<Integer,Double>();for(Map.Entry<Integer, Double> entry:sortedSentList){sortedLinkedSent.put(entry.getKey(),entry.getValue());}Map<Integer,Double> MMR_SentScore=new LinkedHashMap<Integer,Double>();//最终的得分(句子编号与得分)Map.Entry<Integer, Double> Entry=sortedSentList.get(0);//第一步先将最高分的句子加入MMR_SentScore.put(Entry.getKey(), Entry.getValue());boolean flag=true;while(flag){int index=0;double maxScore=Double.NEGATIVE_INFINITY;//通过迭代计算获得最高分句子for(Map.Entry<Integer, Double> entry:sortedLinkedSent.entrySet()){if(MMR_SentScore.containsKey(entry.getKey())) continue;double simSentence=0.0;for(Map.Entry<Integer, Double> MMREntry:MMR_SentScore.entrySet()){//这个是获得最相似的那个句子的最大相似值double simSen=0.0;if(entry.getKey()>MMREntry.getKey())simSen=simSentArray[MMREntry.getKey()][entry.getKey()];elsesimSen=simSentArray[entry.getKey()][MMREntry.getKey()];if(simSen>simSentence){simSentence=simSen;}}simSentence=λ*entry.getValue()-(1-λ)*simSentence;if(simSentence>maxScore){maxScore=simSentence;index=entry.getKey();//句子编号}}MMR_SentScore.put(index, maxScore);if(MMR_SentScore.size()==sortedLinkedSent.size())flag=false;}//System.out.println("MMR out...");return MMR_SentScore;}/*** 每个句子的相似度,这里使用简单的jaccard方法,计算所有句子的两两相似度* @return*/private double[][] sentJSimilarity(){//System.out.println("sentJSimilarity in...");int size=sentSegmentWords.size();double[][] simSent=new double[size][size];for(Map.Entry<Integer, List<String>> entry:sentSegmentWords.entrySet()){for(Map.Entry<Integer, List<String>> entry1:sentSegmentWords.entrySet()){if(entry.getKey()>=entry1.getKey()) continue;int commonWords=0;double sim=0.0;for(String entryStr:entry.getValue()){if(entry1.getValue().contains(entryStr))commonWords++;}sim=1.0*commonWords/(entry.getValue().size()+entry1.getValue().size()-commonWords);simSent[entry.getKey()][entry1.getKey()]=sim;}}//System.out.println("sentJSimilarity out...");return simSent;}@Overridepublic void afterPropertiesSet() throws Exception {this.loadStopWords(this.path);this.styleSet.add("meanstd");this.styleSet.add("default");this.styleSet.add("MMR");}/*    public static void main(String[] args){KeywordsSummary summary=new KeywordsSummary();*//*String text="我国古代历史演义小说的代表作。明代小说家罗贯中依据有关三国的历史、杂记,在广泛吸取民间传说和民间艺人创作成果的基础上,加工、再创作了这部长篇章回小说。" +"作品写的是汉末到晋初这一历史时期魏、蜀、吴三个封建统治集团间政治、军事、外交等各方面的复杂斗争。通过这些描写,揭露了社会的黑暗与腐朽,谴责了统治阶级的残暴与奸诈," +"反映了人民在动乱时代的苦难和明君仁政的愿望。小说也反映了作者对农民起义的偏见,以及因果报应和宿命论等思想。战争描写是《三国演义》突出的艺术成就。" +"这部小说通过惊心动魄的军事、政治斗争,运用夸张、对比、烘托、渲染等艺术手法,成功地塑造了诸葛亮、曹操、关羽、张飞等一批鲜明、生动的人物形象。" +"《三国演义》结构宏伟而又严密精巧,语言简洁、明快、生动。有的评论认为这部作品在艺术上的不足之处是人物性格缺乏发展变化,有的人物渲染夸张过分导致失真。" +"《三国演义》标志着历史演义小说的辉煌成就。在传播政治、军事斗争经验、推动历史演义创作的繁荣等方面都起过积极作用。" +"《三国演义》的版本主要有明嘉靖刻本《三国志通俗演义》和清毛宗岗增删评点的《三国志演义》"*//*;String text = "河南省农业农村厅关于印发《河南省高标准农田建设质量年行动方案》的通知,河南省农业农村厅关于参加2021年河南省省直事业单位公开招聘工作方案,全国农民合作社质量提升整县推进试点单位公布 河南省10地入选试点" +"全国农民合作社质量提升整县推进试点单位公布 河南省10地入选试点";String keySentences=summary.SummaryMeanstdTxt(text);//System.out.println("MEANSTD: "+keySentences);String mmrSentences=summary.SummaryMMRNTxt(text);//System.out.println("+MMR: "+mmrSentences);String topSentences=summary.SummaryTopNTxt(text);//System.out.println("TOP: "+topSentences);}*/
}

ES查询部分:

这里使用的是可以分词的查询
matchQuerymultiMatchQuery

//1.创建 SearchRequest搜索请求,并指定要查询的索引SearchRequest searchRequest = new SearchRequest("titles");//2.创建 SearchSourceBuilder条件构造。SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();//分页if (pageNo > 1) {searchSourceBuilder.from((pageNo - 1) * pageSize);}searchSourceBuilder.size(pageSize);/*//match 查找 (该查询自动分词匹配 )MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title_name", toString);//operator:设置查询的结果取交集还是并集,并集用 or, 交集用 andmatchQueryBuilder.operator(Operator.OR);searchSourceBuilder.query(matchQueryBuilder);*///MultiQuery可以通过 fields 属性来设置多个域联合查找searchSourceBuilder.query(QueryBuilders.multiMatchQuery(toString, "title_name").minimumShouldMatch("20%").field("title_name", 10));//3.将 SearchSourceBuilder 添加到 SearchRequest中searchRequest.source(searchSourceBuilder);//4.执行查询SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);//5.解析查询结果System.out.println("花费的时长:" + searchResponse.getTook());SearchHits hits = searchResponse.getHits();//System.out.println("符合条件的总文档数量:" + hits.getTotalHits());for (SearchHit hit : hits) {String sourceAsString = hit.getSourceAsString();//System.out.println("文档原生信息:" + sourceAsString);TitleTable titleTable = JSON.parseObject(sourceAsString, TitleTable.class);System.out.println("ID: " + titleTable.getId() + "  标题: " + titleTable.getTitleName());String id = titleTable.getId();List<ContentTable> contentTables = contentTableMapper.selectContentTableListByTitleId(id);listVo.addAll(contentTables);}

具体的Java查询ES推荐文章链接:
Java操作es之各种高级查询

Java查询ES (elasticsearch) 对短句进行关键词摘要 并分词查询相关推荐

  1. 【Elasticsearch】 es ElasticSearch集群故障案例分析: 警惕通配符查询 Wildcard

    1.概述 转载:https://elasticsearch.cn/article/171 许多有RDBMS/SQL背景的开发者,在初次踏入ElasticSearch世界的时候,很容易就想到使用(Wil ...

  2. JAVA框架09 -- Elasticsearch

    Elasticsearch介绍和安装 用户访问我们的首页,一般都会直接搜索来寻找自己想要购买的商品. 而商品的数量非常多,而且分类繁杂.如果能正确的显示出用户想要的商品,并进行合理的过滤,尽快促成交易 ...

  3. Elasticsearch教程(4) High Level REST Client API 查询 聚合 分组

    ES High Level REST Client API 查询 聚合 1 准备数据 1.1 插入测试数据 2 Maven引入ES 3 创建Client 4 查询API 4.1 根据id查询单条数据 ...

  4. es elasticsearch 几种常见查询场景 二次分组 java读取es的查询json文件

    大家好,我是烤鸭: es中几种常见的查询场景,使用java读取es的json文件进行查询. es 中文使用手册. https://www.elastic.co/guide/cn/elasticsear ...

  5. es java match_java操作elasticsearch实现条件查询(match、multiMatch、term、terms、reange)...

    1.条件match query查询 //条件查询match query @Testpublic void test10() throwsUnknownHostException {//1.指定es集群 ...

  6. Elasticsearch】es 模糊查询导致Elasticsearch服务宕机

    本文为博主九师兄(QQ:541711153 欢迎来探讨技术)原创文章,未经允许博主不允许转载. 可以加我问问题,免费解答,有问题可以先私聊我,本人每天都在线,会帮助需要的人. 但是本博主因为某些原因, ...

  7. SpringBoot整合elasticsearch (java整合es)

    欢迎大家进群,一起探讨学习 微信公众号,每天给大家提供技术干货 博主技术笔记 博主网站地址1 博主网站地址2 博主开源微服架构前后端分离技术博客项目源码地址,欢迎各位star SpringBoot整合 ...

  8. java使用es查询

    众所周知,elasticsearch简称es,它是基于基于Lucene的搜索服务器.它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口.Elasticsearch是用Java开 ...

  9. ElasticSearch之——Java操作ES实例(基于ES-2.3.0)

    转载请注明出处:http://blog.csdn.net/l1028386804/article/details/78758691 今天,我们就来看看如何利用Java API来操作ES的数据,这里不讲 ...

最新文章

  1. 下一代Asp.net开发规范OWIN(2)—— Katana介绍以及使用
  2. 不是《归去来》是《难念的经》
  3. 继续VISTA下网卡驱动问题
  4. insert在python中的用法_python中insert用法是什么_后端开发
  5. AndroidのActivity之退出返回栈(二)
  6. C++中关于内部与外部函数
  7. jquery获取html代码怎么写,jQuery从html代码中获取对应标签的写法
  8. presentViewController和pushViewController
  9. 记录一次react项目配置过程
  10. Android TensorFlow Lite 深度学习识别手写数字mnist demo
  11. Lynis 检测自身安全漏洞工具(本机)
  12. 奇异值分解(SVD) 的 几何意义
  13. a letter and a number
  14. 守护安全|AIRIOT城市天然气综合管理解决方案
  15. 关系数据库范式(1NF,2NF,3NF,BCNF,4NF,5NF)全解析
  16. 纯CSS制作3D动态相册【流星雨3D旋转相册】HTML+CSS+JavaScriptHTML5七夕情人节表白网页制作
  17. jQuery取值和赋值的基本方法
  18. react脚手架创建项目报错,ReactDOM.render is no longer supported in React 18.
  19. 局域网访问提示无法访问检查拼写_我的电脑是win10系统,局域网共享打印机,提示windows无法访问,请检查名称的拼写.否则网络可能有问题,...
  20. 用Java描述世界:数据

热门文章

  1. OPPO R9s领衔,情人节哪些手机适合作为情侣手机送给对方
  2. 跟人聊天 VS 跟机器聊天,你更爱哪个?
  3. 未来的计算机手抄报,关于未来科技手抄报图片
  4. Python入门基础知识函数式编程之匿名函数
  5. 活动运营自动化平台实践
  6. 【30天学会接口自动化测试】接口自动化测试之实际项目做接口测试(6)
  7. python多态_Python 多态与抽象类
  8. Python多态的两种实现形式
  9. graphviz java api_Graphviz 简易教程
  10. javaSE基础知识笔记(四)