DirtyWordsFilter(脏字过滤)

前言
本章讲述使用256tree过滤脏字
c++ golang 2个版本
老早以前在 https://github.com/progtesttes 写的
这里稍微优化下

1：c++ code
dirtywords.h

#if !defined DIRTY_WORDS_H_
#define DIRTY_WORDS_H_
//#include<stdio.h>
#include<string.h>
class CFilterDirtyWords
{private:CFilterDirtyWords();~CFilterDirtyWords();
public:static CFilterDirtyWords* GetInstance();void ReleaseByOwner() { delete this;  }
private:typedef struct _dirtytree{bool bend;struct _dirtytree * subtree[256];_dirtytree() {bend = false;memset(subtree, 0, sizeof(_dirtytree*) * 256);}}DIRTYTREE, *PDIRTYTREE;PDIRTYTREE m_phead;static CFilterDirtyWords* pFilterDirtyWords;
private:bool loaddirtywords(const char* filepath);bool hasdirtywords(const PDIRTYTREE pHead, const char *  pstring);void filterdirtywords(const PDIRTYTREE pHead, char * pstring);void insertdirtywords(PDIRTYTREE& pHead, const char *  pstring);void releasedirtytree(PDIRTYTREE pHead);
public:bool LoadDirtyFile(const char* filepath=nullptr);bool HasDirtyWords(const char* lpstr);void FilterDirtyWords(char * pstring);
};
#endif

dirtywords.cpp

#include <stdio.h>
#include<ctype.h>
#include "dirtywords.h"
#define  CONFIG_DIRTY_WORDS   "dirtywords.txt"CFilterDirtyWords* CFilterDirtyWords::pFilterDirtyWords = NULL;CFilterDirtyWords::CFilterDirtyWords()
{m_phead = NULL;
}CFilterDirtyWords::~CFilterDirtyWords()
{releasedirtytree(m_phead);
}CFilterDirtyWords* CFilterDirtyWords::GetInstance()
{if(pFilterDirtyWords == NULL) {pFilterDirtyWords = new CFilterDirtyWords();}return pFilterDirtyWords;
}bool CFilterDirtyWords::LoadDirtyFile(const char* filepath)
{return loaddirtywords(filepath);
}bool CFilterDirtyWords::loaddirtywords(const char* filepath)
{FILE * f = fopen(filepath== nullptr? CONFIG_DIRTY_WORDS : filepath, "r");if (NULL == f) {return false;}char szbuf[256];PDIRTYTREE phead = NULL;while (NULL != fgets(szbuf, 256, f)) {insertdirtywords(phead, szbuf);}fclose(f);m_phead = phead;if (NULL == m_phead) {printf("CFilterDirtyWords::loaddirtywords is NULL"); return false;}return true;//   return m_phead?true:false ;
}
void CFilterDirtyWords::filterdirtywords(const PDIRTYTREE pHead, char * pstring)
{if (!pHead) return;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';int pos = 0;char * pTemp = pstring;bool bBegin = false;while (*pTemp != '\0'){ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (pTree->subtree[ch]) {if (!bBegin) {bBegin = true;  pos = pTemp - pstring;}pTree = pTree->subtree[ch];if (pTree->bend) {while (pos <= pTemp - pstring) *(pstring + pos++) = '*';}}else if (bBegin && pHead->subtree[ch]) {pos = pTemp - pstring; pTree = pHead->subtree[ch];if (pTree->bend) {while (pos <= pTemp - pstring) *(pstring + pos++) = '*';}}else {pTree = pHead;  bBegin = false;}++pTemp;}
}
void CFilterDirtyWords::insertdirtywords(PDIRTYTREE& pHead, const char *  pstring)
{if (!pstring) return;if (!pHead) pHead = new DIRTYTREE;const char * pTemp = (char*)pstring;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';while (*pTemp != '\0' && *pTemp != '\r' && *pTemp != '\n') {ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (!pTree->subtree[ch]) pTree->subtree[ch] = new DIRTYTREE;pTree = pTree->subtree[ch];++pTemp;}pTree->bend = true;
}
void CFilterDirtyWords::releasedirtytree(PDIRTYTREE pHead)
{if (!pHead) return;for (unsigned int i = 0; i< 256; i++) {releasedirtytree(pHead->subtree[i]);}delete pHead;
}bool CFilterDirtyWords::hasdirtywords(const PDIRTYTREE pHead, const char *  pstring)
{if (!pHead) return false;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';char * pTemp = (char*)pstring;while (*pTemp != '\0'){ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (pTree->subtree[ch]) {pTree = pTree->subtree[ch];if (pTree->bend) {return true;}}else {pTree = pHead;}++pTemp;}return false;
}bool CFilterDirtyWords::HasDirtyWords(const char *  pstring)
{return hasdirtywords(m_phead, pstring);
}void CFilterDirtyWords::FilterDirtyWords(char * pstring)
{filterdirtywords(m_phead, pstring);
}

main.cpp

#include "dirtywords.h"
#include <stdio.h>
int main() {if (CFilterDirtyWords::GetInstance()->LoadDirtyFile()) {printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("123"));  //1printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("12"));   //0}CFilterDirtyWords::GetInstance()->ReleaseByOwner() ;return  0;
}/*
dirtywords.txt 内容如下
132
123
121
1221
1121
*/

运行结果

2：golang code
dirtyword.go

package ditywordimport ("bufio""io""log""os""strings"
)//256 tree
type dirtytree struct {bend    boolsubtree [256]*dirtytree
}var (dirtyhead *dirtytree = nil
)func loaddirtywords(filename string) bool {fi, err := os.Open(filename)if err != nil {log.Printf("filename=%v Error: %s\n", filename, err)return false}defer fi.Close()phead := new(dirtytree)br := bufio.NewReader(fi)for {a, _, c := br.ReadLine()if c == io.EOF {break}// log.Printf("a=%v \n",string(a))l := len(a)if l < 1 {continue}if l > 256 {a = a[:256]}//fmt.Println(string(a))insertdirtywords(phead, a)}dirtyhead = pheadreturn true
}func hasdirtywords(phead *dirtytree, str string) bool {if phead == nil {return false}var pTree *dirtytree = phead//log.Printf("cmp string=%#v \n",str)strlower := []byte(strings.ToLower(string(str)))l := len([]byte(strlower))if l < 1 {return false}//log.Printf("cmp ToLower string=%#v \n",string(strlower))for i := 0; i < l; i++ {ch := byte(strlower[i])if pTree.subtree[ch] != nil {pTree = pTree.subtree[ch]if pTree.bend {return true}} else {pTree = phead}}return false
}//func filterdirtywords(phead *dirtytree,str string)  {//
//}func insertdirtywords(phead *dirtytree, str []byte) {//全部小写//    log.Printf("org        string=%#v \n",str)strlower := []byte(strings.ToLower(string(str)))l := len([]byte(strlower))if l < 1 {return}// log.Printf("org ToLower string=%#v \n",string(strlower))//   log.Printf("org ToLower string=%#v \n",strlower)if phead == nil {phead = new(dirtytree)}pTree := pheadfor i := 0; i < l; i++ {ch := byte(strlower[i])if pTree.subtree[ch] == nil {pTree.subtree[ch] = new(dirtytree)pTree = pTree.subtree[ch]}}pTree.bend = true
}//func releasedirtytree(phead *dirtytree)  {//}//api
func LoadDirtyWordsFile(filename string) bool {return loaddirtywords(filename)
}func HasDirtyWords(chstr string) bool {return hasdirtywords(dirtyhead, chstr)
}//func FilterDirtyWords(filterstr string)  {//
//}

main.go

package mainimport ("bytes""dirtywords/dityword""fmt""github.com/henrylee2cn/mahonia""log""os""path""regexp""unicode/utf8"
)func check(src string) bool {str := "(?:')|(?:--)|(/\\*(?:.|[\\n\\r])*?\\*/)|(\b(select|update|and|or|delete|insert|trancate|char|chr|into|substr|ascii|declare|exec|count|master|into|drop|execute)\b)" //此处改为“re, err := regexp.Compile(str)if err != nil {fmt.Println(err.Error())return true}b := re.MatchString(src)fmt.Println("lllll", b) //打印出false。return b
}func main() {//1读取配置文件连cfgpath, _ := os.Getwd()filename := path.Join(cfgpath, "ditylist.txt")if !dityword.LoadDirtyWordsFile(filename) {os.Exit(1)}for {var input stringfmt.Scanln(&input)log.Printf("input=%v len=%v \n", input, len(input))if utf8.ValidString(input) {enc := mahonia.NewEncoder("gbk")gbkstr := enc.ConvertString(input)log.Printf("gbkstr=%v \n", []byte(gbkstr))b := dityword.HasDirtyWords(gbkstr)usrc := bytes.Runes([]byte(input))log.Printf("check b=%v uscr=%#v %v\n", b, usrc, len(usrc))//    2018/05/26 00:02:12 input=日 len=3//   2018/05/26 00:02:12 gbkstr=[200 213]// 2018/05/26 00:02:12 check b=true uscr=[]int32{26085} 1//r, size := utf8.DecodeRuneInString(input)//fmt.Printf("%c %v\n", r, size)//    newdata := string(([]byte(input))[size:])//    fmt.Printf("%c %v  data=%v \n", r, size,newdata)//str = str[size:]//    if data,num := utf8.DecodeRuneInString(input); ok {//      b := dityword.HasDirtyWords(input)//       fmt.Printf("check b=%v \n",b)//  }}}}/*
ditylist.txt 内容如下
fyou
fky
fyou1
*/

目录结构及运行结果

3：工程有如要后续上传
如果觉得有用，麻烦点个赞，加个收藏

DirtyWordsFilter(脏字过滤)相关推荐

关键词过滤（脏字过滤）Trie Tree(Hash)和FastCheck两种过滤方式java版本
以前在做关键词或脏字过滤的时候都是使用的TrieTree,后来随便搜索发现了yeerh的这篇文章:http://www.cnblogs.com/yeerh/archive/2011/10/20/221 ...
脏字过滤android代码,asp.NET 脏字过滤算法修改版
旧的算法是简单对每一个脏字调用一遍 string.replace,当然是用了StringBuilder.https://www.jb51.net/article/20575.htm.在我这里测试的时候 ...
php 脏字,高效.NET脏字过滤算法与应用实例
本文实例讲述了高效.NET脏字过滤算法.分享给大家供大家参考,具体如下: BadWordsFilter.cs类using System; using System.Collections.Generi ...
java脏字过滤_分享JavaWeb中filter过滤器的案例妙用 - 脏话过滤/编码过滤/代码过滤...
案例1. 利用Servlet的过滤器Filter进行完成脏话过滤 package cn.javabs.filter; import java.io.IOException; import javax. ...
java脏字过滤方法
public class DirtyFilter { private int rs = -1; public String[] dirtyArr = {};//脏字数组 public boolea ...
java脏字过滤_java 过滤脏字
public class DirtyFilter { private int rs = -1; public String[] dirtyArr = {};//脏字数组 public boolea ...
java脏字过滤_脏字过滤
1.[文件] SensitiveWordFilter.java ~ 7KB 下载(141) package com.forgov.sharpc.infrastruture.util; import s ...
NET脏字过滤算法收藏
方法一:使用正则表达式 1//脏字典数据存放文件路径 2 private static string FILE_NAME="zang.txt"; 3 ...
excel下拉列表数据筛选_从筛选的Excel列表中下拉
excel下拉列表数据筛选 Someone asked me how to make a data validation drop down that only shows the visible r ...

DirtyWordsFilter(脏字过滤)

DirtyWordsFilter(脏字过滤)相关推荐

最新文章

热门文章