DirtyWordsFilter(脏字过滤)
前言
本章讲述使用256tree过滤脏字
c++ golang 2个版本
老早以前在 https://github.com/progtesttes 写的
这里稍微优化下
1:c++ code
dirtywords.h
#if !defined DIRTY_WORDS_H_
#define DIRTY_WORDS_H_
//#include<stdio.h>
#include<string.h>
class CFilterDirtyWords
{private:CFilterDirtyWords();~CFilterDirtyWords();
public:static CFilterDirtyWords* GetInstance();void ReleaseByOwner() { delete this; }
private:typedef struct _dirtytree{bool bend;struct _dirtytree * subtree[256];_dirtytree() {bend = false;memset(subtree, 0, sizeof(_dirtytree*) * 256);}}DIRTYTREE, *PDIRTYTREE;PDIRTYTREE m_phead;static CFilterDirtyWords* pFilterDirtyWords;
private:bool loaddirtywords(const char* filepath);bool hasdirtywords(const PDIRTYTREE pHead, const char * pstring);void filterdirtywords(const PDIRTYTREE pHead, char * pstring);void insertdirtywords(PDIRTYTREE& pHead, const char * pstring);void releasedirtytree(PDIRTYTREE pHead);
public:bool LoadDirtyFile(const char* filepath=nullptr);bool HasDirtyWords(const char* lpstr);void FilterDirtyWords(char * pstring);
};
#endif
dirtywords.cpp
#include <stdio.h>
#include<ctype.h>
#include "dirtywords.h"
#define CONFIG_DIRTY_WORDS "dirtywords.txt"CFilterDirtyWords* CFilterDirtyWords::pFilterDirtyWords = NULL;CFilterDirtyWords::CFilterDirtyWords()
{m_phead = NULL;
}CFilterDirtyWords::~CFilterDirtyWords()
{releasedirtytree(m_phead);
}CFilterDirtyWords* CFilterDirtyWords::GetInstance()
{if(pFilterDirtyWords == NULL) {pFilterDirtyWords = new CFilterDirtyWords();}return pFilterDirtyWords;
}bool CFilterDirtyWords::LoadDirtyFile(const char* filepath)
{return loaddirtywords(filepath);
}bool CFilterDirtyWords::loaddirtywords(const char* filepath)
{FILE * f = fopen(filepath== nullptr? CONFIG_DIRTY_WORDS : filepath, "r");if (NULL == f) {return false;}char szbuf[256];PDIRTYTREE phead = NULL;while (NULL != fgets(szbuf, 256, f)) {insertdirtywords(phead, szbuf);}fclose(f);m_phead = phead;if (NULL == m_phead) {printf("CFilterDirtyWords::loaddirtywords is NULL"); return false;}return true;// return m_phead?true:false ;
}
void CFilterDirtyWords::filterdirtywords(const PDIRTYTREE pHead, char * pstring)
{if (!pHead) return;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';int pos = 0;char * pTemp = pstring;bool bBegin = false;while (*pTemp != '\0'){ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (pTree->subtree[ch]) {if (!bBegin) {bBegin = true; pos = pTemp - pstring;}pTree = pTree->subtree[ch];if (pTree->bend) {while (pos <= pTemp - pstring) *(pstring + pos++) = '*';}}else if (bBegin && pHead->subtree[ch]) {pos = pTemp - pstring; pTree = pHead->subtree[ch];if (pTree->bend) {while (pos <= pTemp - pstring) *(pstring + pos++) = '*';}}else {pTree = pHead; bBegin = false;}++pTemp;}
}
void CFilterDirtyWords::insertdirtywords(PDIRTYTREE& pHead, const char * pstring)
{if (!pstring) return;if (!pHead) pHead = new DIRTYTREE;const char * pTemp = (char*)pstring;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';while (*pTemp != '\0' && *pTemp != '\r' && *pTemp != '\n') {ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (!pTree->subtree[ch]) pTree->subtree[ch] = new DIRTYTREE;pTree = pTree->subtree[ch];++pTemp;}pTree->bend = true;
}
void CFilterDirtyWords::releasedirtytree(PDIRTYTREE pHead)
{if (!pHead) return;for (unsigned int i = 0; i< 256; i++) {releasedirtytree(pHead->subtree[i]);}delete pHead;
}bool CFilterDirtyWords::hasdirtywords(const PDIRTYTREE pHead, const char * pstring)
{if (!pHead) return false;PDIRTYTREE pTree = pHead;unsigned char ch = '\0';char * pTemp = (char*)pstring;while (*pTemp != '\0'){ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;if (pTree->subtree[ch]) {pTree = pTree->subtree[ch];if (pTree->bend) {return true;}}else {pTree = pHead;}++pTemp;}return false;
}bool CFilterDirtyWords::HasDirtyWords(const char * pstring)
{return hasdirtywords(m_phead, pstring);
}void CFilterDirtyWords::FilterDirtyWords(char * pstring)
{filterdirtywords(m_phead, pstring);
}
main.cpp
#include "dirtywords.h"
#include <stdio.h>
int main() {if (CFilterDirtyWords::GetInstance()->LoadDirtyFile()) {printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("123")); //1printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("12")); //0}CFilterDirtyWords::GetInstance()->ReleaseByOwner() ;return 0;
}/*
dirtywords.txt 内容如下
132
123
121
1221
1121
*/
运行结果
2:golang code
dirtyword.go
package ditywordimport ("bufio""io""log""os""strings"
)//256 tree
type dirtytree struct {bend boolsubtree [256]*dirtytree
}var (dirtyhead *dirtytree = nil
)func loaddirtywords(filename string) bool {fi, err := os.Open(filename)if err != nil {log.Printf("filename=%v Error: %s\n", filename, err)return false}defer fi.Close()phead := new(dirtytree)br := bufio.NewReader(fi)for {a, _, c := br.ReadLine()if c == io.EOF {break}// log.Printf("a=%v \n",string(a))l := len(a)if l < 1 {continue}if l > 256 {a = a[:256]}//fmt.Println(string(a))insertdirtywords(phead, a)}dirtyhead = pheadreturn true
}func hasdirtywords(phead *dirtytree, str string) bool {if phead == nil {return false}var pTree *dirtytree = phead//log.Printf("cmp string=%#v \n",str)strlower := []byte(strings.ToLower(string(str)))l := len([]byte(strlower))if l < 1 {return false}//log.Printf("cmp ToLower string=%#v \n",string(strlower))for i := 0; i < l; i++ {ch := byte(strlower[i])if pTree.subtree[ch] != nil {pTree = pTree.subtree[ch]if pTree.bend {return true}} else {pTree = phead}}return false
}//func filterdirtywords(phead *dirtytree,str string) {//
//}func insertdirtywords(phead *dirtytree, str []byte) {//全部小写// log.Printf("org string=%#v \n",str)strlower := []byte(strings.ToLower(string(str)))l := len([]byte(strlower))if l < 1 {return}// log.Printf("org ToLower string=%#v \n",string(strlower))// log.Printf("org ToLower string=%#v \n",strlower)if phead == nil {phead = new(dirtytree)}pTree := pheadfor i := 0; i < l; i++ {ch := byte(strlower[i])if pTree.subtree[ch] == nil {pTree.subtree[ch] = new(dirtytree)pTree = pTree.subtree[ch]}}pTree.bend = true
}//func releasedirtytree(phead *dirtytree) {//}//api
func LoadDirtyWordsFile(filename string) bool {return loaddirtywords(filename)
}func HasDirtyWords(chstr string) bool {return hasdirtywords(dirtyhead, chstr)
}//func FilterDirtyWords(filterstr string) {//
//}
main.go
package mainimport ("bytes""dirtywords/dityword""fmt""github.com/henrylee2cn/mahonia""log""os""path""regexp""unicode/utf8"
)func check(src string) bool {str := "(?:')|(?:--)|(/\\*(?:.|[\\n\\r])*?\\*/)|(\b(select|update|and|or|delete|insert|trancate|char|chr|into|substr|ascii|declare|exec|count|master|into|drop|execute)\b)" //此处改为“re, err := regexp.Compile(str)if err != nil {fmt.Println(err.Error())return true}b := re.MatchString(src)fmt.Println("lllll", b) //打印出false。return b
}func main() {//1读取配置文件连cfgpath, _ := os.Getwd()filename := path.Join(cfgpath, "ditylist.txt")if !dityword.LoadDirtyWordsFile(filename) {os.Exit(1)}for {var input stringfmt.Scanln(&input)log.Printf("input=%v len=%v \n", input, len(input))if utf8.ValidString(input) {enc := mahonia.NewEncoder("gbk")gbkstr := enc.ConvertString(input)log.Printf("gbkstr=%v \n", []byte(gbkstr))b := dityword.HasDirtyWords(gbkstr)usrc := bytes.Runes([]byte(input))log.Printf("check b=%v uscr=%#v %v\n", b, usrc, len(usrc))// 2018/05/26 00:02:12 input=日 len=3// 2018/05/26 00:02:12 gbkstr=[200 213]// 2018/05/26 00:02:12 check b=true uscr=[]int32{26085} 1//r, size := utf8.DecodeRuneInString(input)//fmt.Printf("%c %v\n", r, size)// newdata := string(([]byte(input))[size:])// fmt.Printf("%c %v data=%v \n", r, size,newdata)//str = str[size:]// if data,num := utf8.DecodeRuneInString(input); ok {// b := dityword.HasDirtyWords(input)// fmt.Printf("check b=%v \n",b)// }}}}/*
ditylist.txt 内容如下
fyou
fky
fyou1
*/
目录结构及运行结果
3:工程有如要后续上传
如果觉得有用,麻烦点个赞,加个收藏
DirtyWordsFilter(脏字过滤)相关推荐
- 关键词过滤(脏字过滤)Trie Tree(Hash)和FastCheck两种过滤方式java版本
以前在做关键词或脏字过滤的时候都是使用的TrieTree,后来随便搜索发现了yeerh的这篇文章:http://www.cnblogs.com/yeerh/archive/2011/10/20/221 ...
- 脏字过滤android代码,asp.NET 脏字过滤算法 修改版
旧的算法是简单对每一个脏字调用一遍 string.replace,当然是用了StringBuilder.https://www.jb51.net/article/20575.htm.在我这里测试的时候 ...
- php 脏字,高效.NET脏字过滤算法与应用实例
本文实例讲述了高效.NET脏字过滤算法.分享给大家供大家参考,具体如下: BadWordsFilter.cs类using System; using System.Collections.Generi ...
- java脏字过滤_分享JavaWeb中filter过滤器的案例妙用 - 脏话过滤/编码过滤/代码过滤...
案例1. 利用Servlet的过滤器Filter进行完成脏话过滤 package cn.javabs.filter; import java.io.IOException; import javax. ...
- java脏字过滤方法
public class DirtyFilter { private int rs = -1; public String[] dirtyArr = {};//脏字数组 public boolea ...
- java脏字过滤_java 过滤脏字
public class DirtyFilter { private int rs = -1; public String[] dirtyArr = {};//脏字数组 public boolea ...
- java脏字过滤_脏字过滤
1.[文件] SensitiveWordFilter.java ~ 7KB 下载(141) package com.forgov.sharpc.infrastruture.util; import s ...
- NET脏字过滤算法 收藏
方法一:使用正则表达式 1//脏字典数据存放文件路径 2 private static string FILE_NAME="zang.txt"; 3 ...
- excel下拉列表数据筛选_从筛选的Excel列表中下拉
excel下拉列表数据筛选 Someone asked me how to make a data validation drop down that only shows the visible r ...
最新文章
- 使用pinchzoom实现头像剪裁
- 结婚和年龄有直接关系吗?
- 如何自学python爬虫-小白如何快速学习Python爬虫?
- 019_with语句
- 在Windows系统中安装WAMP
- 【CLRS】《算法导论》读书笔记(一):堆排序(Heapsort)
- iPhone 路径大全
- SwiftTour基础学习(五)控制流
- spring @Autowired注入map
- Atitit db query op shourt code lib list 数据库查询最佳实践 JdbcTemplate spring v2 u77 .docx Atitit db query o
- Android Wear缺少本质上的创新
- 【图像融合】基于matalb小波变换(加权平均法+局域能量+区域方差匹配)图像融合【含Matlab源码 1819期】
- minitab学习系列(1)--二项式分布过程能力分析
- asp.net MVC使用treegrid——jqwidgets插件
- alook浏览器_Alook浏览器app下载|Alook浏览器安卓版下载 v3.4.1 - 跑跑车安卓网
- 100baseT、100baseFX、1000base-SX、100/1000base-T
- 关于学校邮箱收不到matlab验证短信
- flutter 吸顶效果
- Android AOP 编程实践 javapoet + autoService
- echarts 雷达图
热门文章
- 贝塞尔曲线-曲线拟合
- RuntimeError: Expected object of type torch.cuda.LongTensor but found type torch.cuda.IntTensor
- one coin,oneworld
- C++ Copy Elision 1
- 利用NEO与Unity制作游戏(第1部分)
- 网络基础 — IP地址和子网掩码
- 女生转行软件测试难吗?
- 微盟集团上半年业绩逆势增长:抢滩在线新经济未来前景获看好
- allegro artwork设置和颜色设置的导入导出
- 网络安全工程师年薪百万?到底是干什么的?