使用动态规划算法实现文献查重（C/C++实现）

本篇文章是描述使用动态规划算法实现文献查重（C/C++实现）的程序。

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <string.h>
#include <sstream>
#include <cstdlib>
#include <fstream>
#include <cassert>
#include <algorithm>
#include <math.h>
#include <iterator>
#include <cctype>using namespace std;typedef struct Sentence{string sentence; //记录句子int num;          //记录句子单词个数
}Sentence;     typedef struct sameSentence{string libsent;     //记录对到最长公共子序列的lib的句子string textsent;    //记录test中对比的句子int textnum;       //记录test中对比的句子的单词的个数int samenum;        //最长公共子序列的单词个数
}sameSentence;sameSentence saveall[1000];   //存储所有对比好的信息Sentence libSentence[1000];     //按句存储lib文件的内容
int wordSimilarity[1000][1000];
float Score[1000][1000];      //记录最长公共子序列的得分
string str1[1000],str2[1000];   //存储每个句子分割出来的单词int libline = 0;     //lib文件的句子的个数
int testline = 0;    //test文件的句子的个数void libtxtToSentence( string file);  // 把lib里的内容按句子存储
void textToSentence( string file );   // 读取text文本的内容并使用动态规划进行查重
float max( float a, float b );
float maxthree( float a, float b, float c );int main()
{int j;string libfile = "D://lib.txt";string textfile = "D://text.txt";libtxtToSentence( libfile );textToSentence( textfile );for( j=0; j<testline; j++ ){cout<<"第"<<j+1<<"句对比"<<endl;cout<<saveall[j].textsent<<endl;cout<<saveall[j].libsent<<endl;cout<<saveall[j].samenum<<"\t"<<saveall[j].textnum<<endl;cout<<"\n"<<endl;}return 0;
}void libtxtToSentence( string file )   // 把lib里的内容按句子存储
{ifstream infile; infile.open( file.data() );   //将文件流对象与文件连接起来 assert( infile.is_open() );   //若失败,则输出错误消息,并终止程序运行 char currentChar;string tempSentence="";Sentence temp;int num1 = 0;infile >> noskipws;while ( !infile.eof() ){infile>>currentChar;if((( currentChar >='a' )&&( currentChar <='z' ))||(( currentChar >='A' )&&( currentChar <='Z' ))){tempSentence += currentChar;}else if(( currentChar =='.' )||( currentChar =='?' )||( currentChar =='!' )){transform(tempSentence.begin(), tempSentence.end(), tempSentence.begin(), ::tolower);temp.num = num1+1;temp.sentence = tempSentence;libSentence[libline] = temp;     //按每句子存储lib的内容到libSentence[]中num1 = 0;tempSentence = "";libline++;      //lib文件的句子的个数}else    //匹配到空格{if( tempSentence=="" ){continue;}else if(tempSentence[tempSentence.size()==1?1:tempSentence.size()-1] != ' ')  //匹配到空格，当前句末不为空格，添加空格{tempSentence += " ";num1 += 1;    //单词个数加1}}}infile.close();             //关闭文件输入流
}void textToSentence( string file )    // 读取text文本的内容并使用动态规划进行查重
{ifstream infile; infile.open( file.data() );   //将文件流对象与文件连接起来 assert( infile.is_open() );   //若失败,则输出错误消息,并终止程序运行 char currentChar;string tempSentence="";Sentence temp;int num1 = 0;infile >> noskipws;while ( !infile.eof() ){infile>>currentChar;if((( currentChar >='a' )&&( currentChar <='z' ))||(( currentChar >='A' )&&( currentChar <='Z' ))){tempSentence += currentChar;}else if(( currentChar =='.' )||( currentChar =='?' )||( currentChar =='!' ))   //完成一个句子的读取，进行动态规划匹配查重{transform(tempSentence.begin(), tempSentence.end(), tempSentence.begin(), ::tolower);temp.num = num1+1;            // 把当前读取到的test句子的单词个数存下temp.sentence = tempSentence;  // 把当前读取到的test句子存下 num1 = 0;tempSentence = "";for(int i=0;i<libline-1;i++ )   //分别与lib中的每一句进行对比{string t;int n = 0;int k,j;for(istringstream is(temp.sentence); is>>t;) {   //存储把test的句子分割出来的单词str1[n++] = t;}n = 0;for(istringstream is1(libSentence[i].sentence); is1>>t;) {   //存储把lib的句子分割出来的单词str2[n++] = t;}for( k=0;k<temp.num;k++)   //test当前句子的个数{for( j=0;j<libSentence[i].num;j++)  //与lib文件的句子的单词分别进行对比{if(str1[k]==str2[j])wordSimilarity[k][j] = 1;elsewordSimilarity[k][j] = 0;}}for(k=0; k<temp.num; k++){  Score[k][0] = max(Score[k-1][0]-0.5, wordSimilarity[k][0]-0.5*(k-1) );} for(k=0; k<libSentence[i].num; k++){  Score[0][k] = max(Score[0][k-1]-0.5, wordSimilarity[0][k]-0.5*(k-1) );}for(k=1;k<temp.num;k++){for(j=1;j<libSentence[i].num;j++){Score[k][j] = maxthree( Score[k-1][j]-0.5, Score[k][j-1]-0.5, Score[k-1][j-1]+wordSimilarity[k][j] );}}int sum = Score[temp.num-1][libSentence[i].num-1];if(saveall[testline].samenum < sum){saveall[testline].libsent = libSentence[i].sentence;saveall[testline].samenum = sum;saveall[testline].textnum = temp.num;saveall[testline].textsent = temp.sentence;}}testline++;   //test文件的句子的个数}else  //匹配到空格{if( tempSentence=="" ){continue;}else if(tempSentence[tempSentence.size()==1?1:tempSentence.size()-1] != ' ') //匹配到空格，当前句末不为空格，添加空格{tempSentence += " ";num1 += 1;}}}infile.close();             //关闭文件输入流
}float max( float a, float b )
{if( a > b )return a;elsereturn b;
} float maxthree( float a, float b, float c )
{float temp = a;if( temp < b )temp = b;if( temp < c )temp = c;return temp;
}

加企鹅2844365449一起学习技术，讨论问题。

文件文本示例：lib.txx

A recursive algorithm is one that invokes makes reference to itself repeatedly until a certain condition also known as termination co......

text.txt

In mathematics and computer science, an algorithm is a self contained step by step set of operations to be performed. Algorithms

使用动态规划算法实现文献查重（C/C++实现）相关推荐

本科毕业论文去哪里查重比较靠谱？
毕业在即,论文好不容易写完,查重又成了难题.尤其天临元年以来,查重率越降越低,小伙伴人心重重. 学校给的免费查重次数远远不够用怎么办?查重只用一个网站不放心怎么办?大家放宽心,下面我给大家介绍几个好用 ...
刚刚！知网开放个人查重服务，研究生学位论文3次免费，网友吐糟：「毕业了才开放」...
来源:光明日报 6月12日零点,同方知网(北京)技术有限公司发布<公告>:即日起,中国知网向个人用户直接提供查重服务.请认准https://cx.cnki.net为中国知网个人查重服务唯一 ...
opencv图像配准_Milvus 实战 | 基于 Milvus 的图像查重系统
背景介绍由于巨大的利益,论文造假屡见不鲜,在部分国家或地区甚至形成了论文造假的产业链.目前大部分论文查重系统只能检查论文文字,不能检查图片.因此,论文图片查重已然成为了学术论文原创性检测的重要部分. ...
刷课会被检测出来吗?_知网查重系统检测出来的结果准确吗？
绝大多数高校都是使用知网论文检测系统,尤其是硕士论文的检测.将近90%的高校都是使用知网检测系统软件.那知网检测出来的重复率准确吗? 很多研究生在毕业前都会忙着处理即将离校的事情,但他们又不得不为自己 ...
个人项目-论文查重/3120005470
文章目录 1.作业的基本信息 2.作业地址 3.PSP表格 4.算法的设计与实现过程 4.1 分词 4.2 hash 4.3 加权 4.4 合并 4.5 降维 4.6 通过海明距离计算simhash的 ...
Milvus 实战 | 基于 Milvus 的图像查重系统
背景介绍由于巨大的利益,论文造假屡见不鲜,在部分国家或地区甚至形成了论文造假的产业链.目前大部分论文查重系统只能检查论文文字,不能检查图片.因此,论文图片查重已然成为了学术论文原创性检测的重要部 ...
python知网查重_用Python写了个检测抄袭/文章去重算法（nshash）
中国人有句话叫"天下文章一大抄",但是在正规场合下"抄"是要付出代价的,比如考试.写论文是不能抄的,一旦被发现后果相当严重.在互联网出现之前,"抄&q ...
kcf算法中cos_window是什么意思_知网/维普查重系统算法介绍（史上最详细）
很多同学在降重的过程中经常是面对一大片标红的文字不知所措,无从下手,甚至修改了两三遍了重复率依然还是没有变,更过分的是有的居然不降反升,这就尴尬了.那么这就要求大家对各个查重系统的规则和算法有一定的了 ...
论文的文献综述查重吗？
也许有些人不知道什么是论文综述是什么,写论文时不需要写论文的文献综述,写论文文献综述时需要注意什么,论文的文献综述需要查重吗?接下来,让小编来谈谈论文献综述查重的问题. 论文文献综述实际上是论文参考文 ...

使用动态规划算法实现文献查重（C/C++实现）

加企鹅2844365449一起学习技术，讨论问题。

文件文本示例：lib.txx

使用动态规划算法实现文献查重（C/C++实现）相关推荐

最新文章

热门文章