参考了本文:http://www.cnblogs.com/xulb597/archive/2012/07/05/2578562.html

  • 支持模糊搜索,比如,【bkmh】可以匹配【BuKaManHua】;
  • 支持优先级,首字母、大小字母有更高的优先级。

亟需解决的问题:

  • 目前搜索结果与关键词中字母的顺序无关,即【buk】可以匹配【BKManHua】
  • 若条目中存在重复的任意关键字,即使不包含其他关键字,仍然能匹配上来
  • 内存的占用:因为接触C++不久,内存管理一窍不通,一个1.65MB的文件(312964个单词),索引之后程序(VS2013编译,Release版本)的内存有68 860KB,

trie.hpp

#ifndef TRIE
#define TRIE#include "LinkedList.hpp"
#include "MatchInfo.hpp"#include <stdlib.h>
#include <iostream>
#include <string>#define BRANCH_SIZE 28
#define START_CHAR 'a'#define INDEX(x) (x == '?'? 27 : (x == '*'? 26 : x - START_CHAR))class Trie
{
public:Trie(){rootNode = new Trie_Node(0);memset(nodeMap, NULL, sizeof(nodeMap));memset(indexList, NULL, sizeof(indexList));}~Trie(){//delete rootNode;
    }void insert(const char *data, const int i){bool flag_start = false,flag_capital = false;Trie_Node *location = rootNode;int pos = 0;while (*data){char c = *data;// check wether it's capital and convert to lowwer case if so.if(c > 'A'-1 && c < 'Z'+1){flag_capital = true;c += 32;}else{flag_capital = false;}// map the char value to int which starts from 0int index = INDEX(c);// skip invalid charsif(index < 0){data++;pos++;continue;}// find nextif(location->next[index] == NULL){location->next[index] = getNode(index);}location = location->next[index];// build MatchInfo and add it to the trie node's indexListMatchInfo *info = new MatchInfo();info->itemindex = i;info->position = pos;    // position of the char in the stringinfo->priority = 1;// intial or capital char has a higher priorityif(!flag_start){flag_start = true;info->priority++;}if(flag_capital)info->priority++;if(indexList[index] == NULL)indexList[index] = new LinkedList<MatchInfo>();indexList[index]->add(info);data++;pos++;}// end character has a higher priority//location->indexList->getCurrent()->value->priority++;
    }/*int match(const char *data){Trie_Node *location = rootNode;while (*data && location){location = location->next[INDEX(*data)];data++;}return (location != NULL);}*//*void fuzzy_match(const char *data, int* indexMap, size_t indexMapLength){predicateIndexMap(data, indexMap, indexMapLength);int index;Trie_Node *location = rootNode;while (*data && location){index = INDEX(*data);location = location->next[INDEX(*data)];if(location != NULL){fillIndexArray(indexMap, index);}data++;}}*/void fuzzy_match(const char *data, int* indexMap, size_t indexMapLength){predicateIndexMap(data, indexMap, indexMapLength);int index;Trie_Node *location = nodeMap[INDEX(*data)];do{index = INDEX(*data);if(location != NULL){fillIndexArray(indexMap, index);}else{break;}data++;} while ((*data) && (location = nodeMap[index]));}/*void print(){print(rootNode);}*/private://// a list to record matche info of each char in indexed words.// it's for priority and fuzzy seaching.//
    LinkedList<MatchInfo>* indexList[BRANCH_SIZE];struct Trie_Node{//int index;Trie_Node *next[BRANCH_SIZE];Trie_Node(int _index){//index = _index;memset(next, NULL, sizeof(next));};~Trie_Node(){//delete indexList;for (int i = 0; i < BRANCH_SIZE; i++){if(next[i])delete next[i];}}};Trie_Node *rootNode;//// a map to hold all created Trie_Node.//
    Trie_Node *nodeMap[BRANCH_SIZE];//// /*get a trie node from map.*/// return a new Trie_Node;// index: (char - 'a')//
    Trie_Node *getNode(int index){//return new Trie_Node(index);Trie_Node *tempNode = nodeMap[index];if(tempNode == NULL){tempNode = new Trie_Node(index);nodeMap[index] = tempNode;}return tempNode;}//// fill [indexMap] with priority of char at [index]//
    void fillIndexArray(int* indexMap, int index){if(indexList[index] == NULL)indexList[index] = new LinkedList<MatchInfo>();LinkedList<MatchInfo> *list = indexList[index];Node<MatchInfo> *node = list->getRoot();while (node){int itemIndex = node->value->itemindex;if(indexMap[itemIndex] != -1)indexMap[itemIndex] += node->value->priority;node = node->next;}}//// keep moving node to next until it's itemindex in value has been changed.// node will set to NULL if reaches the end.//
    void moveToNextItemIndex(Node<MatchInfo> **node){int index = (*node)->value->itemindex;if((*node)->next == NULL)(*node) = NULL;else{while ((*node)->value->itemindex == index){(*node)=(*node)->next;if((*node) == NULL)break;}}}//// predicate whether an index in indexMap is impossiable to be matched.// It will be set to -1 if so.//
    void predicateIndexMap(const char* keyword, int* indexMap, size_t indexMapLength){int *indexesMatched = new int[indexMapLength];int keywordLength = strlen(keyword);unsigned int keywordRecords[BRANCH_SIZE];size_t size = indexMapLength * sizeof(int);memset(indexesMatched, 0, size);memset(indexMap, -1, size);LinkedList<MatchInfo> *list;Node<MatchInfo> *match_node;int charIndex, index = 0;while (*keyword){charIndex = INDEX(*keyword);if(keywordRecords[charIndex] == 1){keyword++;continue;}keywordRecords[charIndex] = 1;list = indexList[charIndex];if(list != NULL){match_node = list->getRoot();while (match_node != NULL){indexesMatched[match_node->value->itemindex]++;match_node = match_node->next;//moveToNextItemIndex(&match_node);
                }}keyword++;}for (int i = 0; i < indexMapLength; i++){if(indexesMatched[i] >= keywordLength)indexMap[i] = 0;}delete indexesMatched;}/*void print(Trie_Node* node){char c;for (int i = 0; i < BRANCH_SIZE; i++){if(node->next[i] != NULL)\n{c = node->index + 'a';printf("%c-", c);print(node->next[i]);}}}*/
};
#endif // TRIE

View Code

LinkedList.hpp

#ifndef LINKEDLIST
#define LINKEDLIST#include <stdlib.h>
#include <iostream>template <class T>
struct Node
{T* value;int index;Node *next;Node(T* _value, int _index){value = _value;index = _index;}~Node(){delete value;}
};template <class T>
class LinkedList
{
public:int length;LinkedList(){length = 0;root = new Node<T>(NULL, 0);current = root;};~LinkedList(){Node<MatchInfo> *node = root;Node<MatchInfo> *tmp;while (node){tmp = node->next;delete node;node = tmp;}};void add(T *value){if(length == 0){root->value = value;root->index = 0;}else{current->next = new Node<T>(value, current->index + 1);current = current->next;}length++;current->next = NULL;};Node<T> getAt(int index){Node<T> *node = root;while (node){if(node->index == index)return node;node = node->next;}return NULL;}Node<T> *getRoot(){return root;}Node<T> *getCurrent(){return current;}
private:Node<T> *root,*current;
};#endif // LINKEDLIST

View Code

MatchInfo.hpp

#ifndef DEFINE_MatchInfo
#define DEFINE_MatchInfo
//
// 字符的匹配信息
//
struct MatchInfo
{// 所在条目的序号int itemindex;// 所在的位置int position;// 优先级int priority;
};
#endif

View Code

SortBiTree.hpp

#ifndef DEFINE_SortBiTree
#define DEFINE_SortBiTree#include <stdlib.h>
#include <iostream>
#include <string>template <class T>
struct BTNode
{int index;T value;BTNode<T> *left,*right;BTNode(int _i, T _v){index = _i;value = _v;left = NULL;right = NULL;}
};template <class T>
class SortBiTree
{
public:SortBiTree(){root = NULL;}~SortBiTree(){}void add(int index, T value){BTNode<T> *node = root;if(root == NULL)root = new BTNode<T>(index, value);else{add_iter(root, index, value);}}BTNode<T> *getMaxNode(){return maxNode;}BTNode<T> *getRootNode(){return root;}
private:BTNode<T> *root,*minNode,*maxNode;void add_iter(BTNode<T>* node, int index, int value){if(index > node->index){if(node->left != NULL){add_iter(node->left, index, value);}else{node->left = new BTNode<T>(index, value);maxNode = node->left;}}else{if(node->right != NULL){add_iter(node->right, index, value);}else{node->right = new BTNode<T>(index, value);minNode = node->right;}}}
};
#endif

View Code

Stopwatch.hpp

#ifndef STOPWATCH_DEF
#define STOPWATCH_DEF
#include <ctime>class Stopwatch
{
public:Stopwatch(){};~Stopwatch(){};static Stopwatch* StartNew(){Stopwatch* sw = new Stopwatch();sw->startTime = clock();return sw;}double Stop(){double t = (double)(clock() - startTime)/CLOCKS_PER_SEC;delete this;return t;}
private:clock_t startTime;
};
#endif

View Code

main.cpp

#include <stdlib.h>
#include <iostream>
#include <string>
#include <fstream>
#include <sys/stat.h>#include "Stopwatch.hpp"
#include "trie.hpp"
#include "SortBiTree.hpp"using namespace std;int displayMaxItems = 10;unsigned long get_file_size(const char *path)
{unsigned long filesize = -1;    struct stat statbuff;if(stat(path, &statbuff) < 0){return filesize;}else{filesize = statbuff.st_size;}return filesize;
}void readFromFile(const char* path, char** buff)
{long length = get_file_size(path);    // 取得文件大小if(length == -1){cerr << "content file is invalid!" << endl;system("quit");return;}FILE *f = fopen(path, "r");*buff = new char[length];fread(*buff, sizeof(char), length, f);fclose(f);
}void readInlineChars(char* source, char** buff)
{*buff = new char[strlen(source)];strcpy(*buff, source);
}//
// 用于显示结果
// TODO:使用迭代器
//
void inorder_traverse(BTNode<int> *node, char* words[], int* indexMap, int threshold) {if(displayMaxItems == 0)return;if (NULL != node->left) {inorder_traverse(node->left, words, indexMap, threshold);}int confidence = indexMap[node->value];if(confidence > threshold){printf("%i\t%i\t%s\n", node->value, confidence, words[node->value]);displayMaxItems--;}if (NULL != node->right) {inorder_traverse(node->right, words, indexMap, threshold);}
}void main(char* argv){Trie t;SortBiTree<int> bt;char* cpy = new char[];//=====================================// 注意,大文件请自行修改,去掉words//=====================================char *words[256];printf("indexing...");//readFromFile("contents.txt", &cpy);readFromFile("app_list.txt", &cpy);//readInlineChars("PlayShangDian PhotoshopDesigner pho Pho BuKaManHua BKManHua", &cpy);//readInlineChars("abc acc caa acb aaa abb", &cpy);//readInlineChars("aac aaa", &cpy);char *tk = strtok(cpy, " ");size_t index = 0;Stopwatch *sw = Stopwatch::StartNew();t.insert(tk, index++);while (tk = strtok(NULL, " ")){words[index] = tk;t.insert(tk, index++);}printf("%i word(s) have been indexed. [%lf seconds]\n", index, sw->Stop());
label_enter:printf("I'm searching for:\n>");string input;const char *chars;while (true){cin >> input;chars = input.data();if(!isalpha(*chars) || (*chars <= 'Z' && *chars >= 'A')){printf("only lowwer character is accepted!\n>");}elsebreak;}int *indexMap = new int[index];sw = Stopwatch::StartNew();printf("searching...");t.fuzzy_match(chars, indexMap, index);printf("done. [%lf seconds]\n", sw->Stop());
label_change:bt = SortBiTree<int>();printf("please input the threshold:\n>");int threshold = 0;cin >> threshold;printf("calculating...");int count = 0;for (int i = 0; i < index; i++){int confidence = indexMap[i];if(confidence > threshold){count++;bt.add(confidence, i);}}printf(" [%i] words matched.\n", count);displayMaxItems = 10;if(count > displayMaxItems)printf("first 10 items are listed below.\n");goto label_display;label_display:printf("------------------------------------------\n");printf("index\tpriority\tcontent\n");printf("------------------------------------------\n");if(bt.getRootNode() != NULL)inorder_traverse(bt.getRootNode(), words, indexMap, threshold);elseprintf("                none                      \n");/*it = &bt.getIterator();while (it->hasNext()){int i = it->next();int confidence = indexMap[i];if(confidence > threshold){printf("%s\t%i\t%i\n",words[i], confidence, i);}}*/printf("------------------------------------------\n");
label_menu:printf("now you may want to : \n[1].See them all.\n[2].Change thresgold\n[3].Change keyword.\n[q].Exit\n>");char choise = 0;cin >> choise;switch (choise){case '1':displayMaxItems = -1;goto label_display;break;case '2':goto label_change;break;case '3':goto label_enter;break;case 'q':break;default:break;}//delete indexMap;
}

View Code

截图:

转载于:https://www.cnblogs.com/ornithopter/p/3732496.html

Trie实现(C++)相关推荐

  1. BZOJ3166 [Heoi2013]Alo 【可持久化trie树 + 二分 + ST表】

    题目 Welcome to ALO ( Arithmetic and Logistic Online).这是一个VR MMORPG , 如名字所见,到处充满了数学的谜题. 现在你拥有n颗宝石,每颗宝石 ...

  2. usaco Cowxor (trie 树)

    没想到trie树还可以用在这上面,厉害厉害. [分析]这是字母树的经典应用.首先因为是求xor的最大值,可以用前缀和计算xor值,然后n^2枚举即可. [cpp] view plaincopy for ...

  3. 字符串匹配算法 -- AC自动机 基于Trie树的高效的敏感词过滤算法

    文章目录 1. 算法背景 2. AC自动机实现原理 2.1 构建失败指针 2.2 依赖失败指针过滤敏感词 3. 复杂度及完整代码 1. 算法背景 之前介绍过单模式串匹配的高效算法:BM和KMP 以及 ...

  4. 字符串匹配数据结构 --Trie树 高效实现搜索词提示 / IDE自动补全

    文章目录 1. 算法背景 2. Trie 树实现原理 2.1 Trie 树的构建 2.2 Trie树的查找 2.3 Trie树的遍历 2.4 Trie树的时间/空间复杂度 2.5 Trie 树 Vs ...

  5. POJ 2418 Hardwood Species(trie 树)

    题目链接 开始想用map的,字典序不会搞,还是老老实实的用trie树把.好久没写了,忘得差不多了. 1 #include <iostream> 2 #include <cstdio& ...

  6. Kanade's trio 2017多校#3 trie

    求数组中i<j<k 并且ai^aj<aj^ak的三元组组数 枚举插入ak,让ak中每一位作为最高位,查找字典树内最高位不同的数字数量 注意把ak的每个前缀做一个bad标记 存储让这个 ...

  7. [您有新的未分配科技点]可,可,可持久化!?------0-1Trie和可持久化Trie普及版讲解...

    这一次,我们来了解普通Trie树的变种:0-1Trie以及在其基础上产生的可持久化Trie(其实,普通的Trie也可以可持久化,只是不太常见) 先简单介绍一下0-1Trie:一个0-1Trie节点只有 ...

  8. 【bzoj3261】最大异或和 可持久化Trie树

    题目描述 给定一个非负整数序列 {a},初始长度为 N.        有M个操作,有以下两种操作类型: 1.A x:添加操作,表示在序列末尾添加一个数 x,序列的长度 N+1. 2.Q l r x: ...

  9. 算法 | 动画+解析,轻松理解「Trie树」

    Trie这个名字取自"retrieval",检索,因为Trie可以只用一个前缀便可以在一部字典中找到想要的单词. 虽然发音与「Tree」一致,但为了将这种 字典树 与 普通二叉树 ...

  10. 浅谈树形结构的特性和应用(上):多叉树,红黑树,堆,Trie树,B树,B+树......

    点击上方"方志朋",选择"设为星标" 回复"666"获取新整理的面试文章 上篇文章我们主要介绍了线性数据结构,本篇233酱带大家看看 无所不 ...

最新文章

  1. tomcat项目自动发布脚本.脚本运行效果
  2. 查看文件命令cat,more,less,tail,tac,nl,od---linux学习笔记
  3. 操作系统中,进程与线程怎么设计的?
  4. POJ-2386-Lake Counting
  5. 辣眼睛:程序员这样过儿童节
  6. Visual Studio 中Debug模式和Release模式的区别
  7. datalength,求字符串的字节数
  8. Facebook经典CTR预估模型
  9. android 刷rom,刷ROM是什么?刷ROM是什么意思?
  10. C语言骚操作:结构体初始化方法
  11. html动画加载效果,CSS3 实现 Loading(加载)动画效果
  12. python发送电子邮件
  13. 机器学习笔记(二十四):召回率、混淆矩阵
  14. 哈工大中文分词系统ltp4j使用总结
  15. 网站地图在线生成html,如何制作网站地图(sitemap.html和sitemap.xml)?
  16. 史上最搞笑的程序员段子,你看懂了吗?
  17. entrypoint size limit: The following entrypoint(s) combined asset size exceeds the recommended limit
  18. 多个计算机组成一个,怎么将两台计算机组成一个集群?
  19. 使用luckysheet实现excel导入导出
  20. 思科—计算机网络课程设计—第八章DHCP概念测试

热门文章

  1. 《Linux内核设计与实现》读书笔记(十六)- 页高速缓存和页回写
  2. 微型计算机与接口技术考试题,微机原理与接口技术试题库2
  3. 四线接近开关接线图_135张图!开关、电机、断路器、电热偶、电表接线图大全!...
  4. 智能指针——unique_ptr
  5. C++中如何初始化类中const或引用类型的数据成员?
  6. android 补间动画有停顿,Android动画原理分析(一)----补间动画
  7. mysql错误号码1040_Mysql ERROR 1040 (00000): Too many connections
  8. 手写单隐层神经网络_鸢尾花分类(matlab实现)
  9. [BUUCTF-pwn]——wustctf2020_getshell
  10. AKKA文档(java版)——准备开始