最近亲眼看到快播被大批警察包围,感觉到快播注定要关闭很多东西,很多宅男宅女们又要寂寞了,于是乎,疯狂的研究DHT网络技术

看到网上也有开源的代码,这不,我拿来进行了二次重写,呵呵,上代码:

#encoding: utf-8
import socket
from hashlib import sha1
from random import randint
from struct import unpack, pack
from socket import inet_aton, inet_ntoa
from bisect import bisect_left
from threading import Timer
from time import sleep
import MySQLdb
from datetime import *
import timefrom bencode import bencode, bdecodeBOOTSTRAP_NODES = [("router.bittorrent.com", 6881),("dht.transmissionbt.com", 6881),("router.utorrent.com", 6881)
]
TID_LENGTH = 4
KRPC_TIMEOUT = 10
REBORN_TIME = 5 * 60
K = 8def entropy(bytes):s = ""for i in range(bytes):s += chr(randint(0, 255))return sdef random_id():hash = sha1()hash.update( entropy(20) )return hash.digest()def decode_nodes(nodes):n = []length = len(nodes)if (length % 26) != 0: return nfor i in range(0, length, 26):nid = nodes[i:i+20]ip = inet_ntoa(nodes[i+20:i+24])port = unpack("!H", nodes[i+24:i+26])[0]n.append( (nid, ip, port) )return ndef encode_nodes(nodes):strings = []for node in nodes:s = "%s%s%s" % (node.nid, inet_aton(node.ip), pack("!H", node.port))strings.append(s)return "".join(strings)def intify(hstr):return long(hstr.encode('hex'), 16)    def timer(t, f):Timer(t, f).start()class BucketFull(Exception):passclass KRPC(object):def __init__(self):self.types = {"r": self.response_received,"q": self.query_received}self.actions = {"ping": self.ping_received,"find_node": self.find_node_received,"get_peers": self.get_peers_received,"announce_peer": self.announce_peer_received,}self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)self.socket.bind(("0.0.0.0", self.port))def response_received(self, msg, address):self.find_node_handler(msg)def query_received(self, msg, address):try:self.actions[msg["q"]](msg, address)except KeyError:passdef send_krpc(self, msg, address):try:self.socket.sendto(bencode(msg), address)except:passclass Client(KRPC):def __init__(self, table):self.table = tabletimer(KRPC_TIMEOUT, self.timeout)timer(REBORN_TIME, self.reborn)KRPC.__init__(self)def find_node(self, address, nid=None):nid = self.get_neighbor(nid) if nid else self.table.nidtid = entropy(TID_LENGTH)msg = {"t": tid,"y": "q","q": "find_node","a": {"id": nid, "target": random_id()}}self.send_krpc(msg, address)def find_node_handler(self, msg):try:nodes = decode_nodes(msg["r"]["nodes"])for node in nodes:(nid, ip, port) = nodeif len(nid) != 20: continueif nid == self.table.nid: continueself.find_node( (ip, port), nid )except KeyError:passdef joinDHT(self):for address in BOOTSTRAP_NODES: self.find_node(address)def timeout(self):if len( self.table.buckets ) < 2:self.joinDHT()timer(KRPC_TIMEOUT, self.timeout)def reborn(self):self.table.nid = random_id()self.table.buckets = [ KBucket(0, 2**160) ]timer(REBORN_TIME, self.reborn)def start(self):self.joinDHT()while True:try:(data, address) = self.socket.recvfrom(65536)msg = bdecode(data)self.types[msg["y"]](msg, address)except Exception:passdef get_neighbor(self, target):return target[:10]+random_id()[10:]class Server(Client):def __init__(self, master, table, port):self.table = tableself.master = masterself.port = portClient.__init__(self, table)def ping_received(self, msg, address):try:nid = msg["a"]["id"]msg = {"t": msg["t"],"y": "r","r": {"id": self.get_neighbor(nid)}}self.send_krpc(msg, address)self.find_node(address, nid)except KeyError:passdef find_node_received(self, msg, address):try:target = msg["a"]["target"]neighbors = self.table.get_neighbors(target)nid = msg["a"]["id"]msg = {"t": msg["t"],"y": "r","r": {"id": self.get_neighbor(target), "nodes": encode_nodes(neighbors)}}self.table.append(KNode(nid, *address))self.send_krpc(msg, address)self.find_node(address, nid)except KeyError:passdef get_peers_received(self, msg, address):try:infohash = msg["a"]["info_hash"]neighbors = self.table.get_neighbors(infohash)nid = msg["a"]["id"]msg = {"t": msg["t"],"y": "r","r": {"id": self.get_neighbor(infohash), "nodes": encode_nodes(neighbors)}}self.table.append(KNode(nid, *address))self.send_krpc(msg, address)self.master.log(infohash)self.find_node(address, nid)except KeyError:passdef announce_peer_received(self, msg, address):try:infohash = msg["a"]["info_hash"]nid = msg["a"]["id"]msg = { "t": msg["t"],"y": "r","r": {"id": self.get_neighbor(infohash)}}self.table.append(KNode(nid, *address))self.send_krpc(msg, address)self.master.log(infohash)self.find_node(address, nid)except KeyError:passclass KTable(object):def __init__(self, nid):self.nid = nidself.buckets = [ KBucket(0, 2**160) ]def append(self, node):index = self.bucket_index(node.nid)try:bucket = self.buckets[index]bucket.append(node)except IndexError:returnexcept BucketFull:if not bucket.in_range(self.nid): returnself.split_bucket(index)self.append(node)def get_neighbors(self, target):nodes = []if len(self.buckets) == 0: return nodesif len(target) != 20 : return nodesindex = self.bucket_index(target)try:nodes = self.buckets[index].nodesmin = index - 1max = index + 1while len(nodes) < K and ((min >= 0) or (max < len(self.buckets))):if min >= 0:nodes.extend(self.buckets[min].nodes)if max < len(self.buckets):nodes.extend(self.buckets[max].nodes)min -= 1max += 1num = intify(target)nodes.sort(lambda a, b, num=num: cmp(num^intify(a.nid), num^intify(b.nid)))return nodes[:K]except IndexError:return nodesdef bucket_index(self, target):return bisect_left(self.buckets, intify(target))def split_bucket(self, index):old = self.buckets[index]point = old.max - (old.max - old.min)/2new = KBucket(point, old.max)old.max = pointself.buckets.insert(index + 1, new)for node in old.nodes[:]:if new.in_range(node.nid):new.append(node)old.remove(node)def __iter__(self):for bucket in self.buckets:yield bucketclass KBucket(object):__slots__ = ("min", "max", "nodes")def __init__(self, min, max):self.min = minself.max = maxself.nodes = []def append(self, node):if node in self:self.remove(node)self.nodes.append(node)else:if len(self) < K:self.nodes.append(node)else:raise BucketFulldef remove(self, node):self.nodes.remove(node)def in_range(self, target):return self.min <= intify(target) < self.maxdef __len__(self):return len(self.nodes)def __contains__(self, node):return node in self.nodesdef __iter__(self):for node in self.nodes:yield nodedef __lt__(self, target):return self.max <= targetclass KNode(object):__slots__ = ("nid", "ip", "port")def __init__(self, nid, ip, port):self.nid = nidself.ip = ipself.port = portdef __eq__(self, other):return self.nid == other.nid#using example
class Master(object):def __init__(self, f):self.f = ftry:self.conn=MySQLdb.connect(host='localhost',user='root',passwd='',db='bt',port=3306)self.cur=self.conn.cursor()except MySQLdb.Error,e:print "Mysql Error %d: %s" % (e.args[0], e.args[1])def log(self, infohash):try:sql = "insert into bt_main_new(hash,name,length,date) values(%s,%s,%s,%s)"date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())re=self.cur.execute(sql,(infohash,'','',date))self.conn.commit()self.cur.close()self.conn.close()#print reexcept MySQLdb.Error,e:print "Mysql Error %d: %s" % (e.args[0], e.args[1])self.f.write(infohash.encode("hex")+"\n")self.f.flush()
try:d = date.today()f = open("%s.log" % d, "a")m = Master(f)s = Server(Master(f), KTable(random_id()), 8006)s.start()
except KeyboardInterrupt:s.socket.close()f.close()

本爬虫程序,会自动爬取得网络上分享的bt种子,写入文件盒数据库,爬取的只是个种子的hash码,还需要到网络上下载种子进行分析

下载种子,相信大家都知道国外有几个免费分享种子的网站,大家可以根据hash码去下载,分析,下面呈上我写的一个分析种子的程序:

#! /usr/bin/python# -*- coding: utf-8 -*-import MySQLdb
from datetime import *
import time
import re
from time import sleep
import bencode
import urllib2
import base64try:conn=MySQLdb.connect(host='localhost',user='root',passwd='',db='bt',port=3306)cur=conn.cursor()sql = "select * from bt_main where name = '' order by id desc"count = cur.execute(sql)rows = cur.fetchall()for row in rows:if row[2].strip() != '':continueid = row[0]hash = row[1]url = "http://haofuli.duapp.com/go/info.php?hash=%s" % hashfile = urllib2.urlopen(url).read()if "error!" == file:try:sql = "update bt_main set isTrue = 0 where id = %s "re = cur.execute(sql,(id))conn.commit()except MySQLdb.Error,e:print "Mysql Error %d: %s" % (e.args[0], e.args[1])else:#decodetry:fileEncode = bencode.bdecode(file)except Exception,e:passif 'name.utf-8' in fileEncode['info']:filename=fileEncode['info']['name.utf-8']else:filename = fileEncode['info']['name']##lengthif "length" in fileEncode['info']:length = fileEncode['info']['length']else:length = 0try:sql = "update bt_main set name = %s , length = %s , isTrue = 1 where id = %s"re = cur.execute(sql,(base64.b64encode(filename),length,id))conn.commit()except MySQLdb.Error,e:print "Mysql Error %d: %s" % (e.args[0], e.args[1])
except MySQLdb.Error,e:print "Mysql Error %d: %s" % (e.args[0], e.args[1])

上面的只是简单的分析,对于多文件的,还没有处理。我最近在解析种子的时候,总是出现莫名的填充文件的问题,可能是版本过低吧,最近仍旧在解决。

BT种子站,本人用PHP做了一个BT种子站,域名再次就不说啦哈,csdn不让写。大家可以回复向我索取域名哈。

bt.dianfenxiang.com

bt种子爬虫程序和种子解析(大蟒蛇语言编写)相关推荐

  1. python语言与蟒蛇_1、python语言是一种“大蟒蛇语言‘’,但是python语言却和蟒蛇没有任何关系_学小易找答案...

    [单选题]最早的显微镜是有谁发明的?( ) [简答题]已知斜度,抄画图形. [简答题]什么是程序设计? [简答题]简述承保年度制满期赔付率涵义 [单选题]筹集股权资本是企业筹集( )的一种重要方式. ...

  2. 计算机编写的程序具有可移植性,用高级程序设计语言编写的程序()。A.计算机能直接执行B.可读性和可移植性好C.可读性差但执行效率...

    用高级程序设计语言编写的程序().A.计算机能直接执行B.可读性和可移植性好C.可读性差但执行效率 更多相关问题 有些消费函数表明,收入和消费是负相关的.() 地球上陆地的面积约为148 000 00 ...

  3. C语言编写程序输出循环字母,用C语言编写一个程序,从键盘上输入一个小写字母,将其本身及对应的大写字母输出....

    程序如下: #include int main() { char ch; scanf("%c",&ch); printf("%c",ch-'a'+'A' ...

  4. 解析数据帧 c语言编写

    #include "stdio.h" #include <stdlib.h> FILE *fp; FILE *fp1; // 头文件#include <stdio ...

  5. c语言简单收银系统程序编码,我有个C语言编写的超市收银系统,请高手修改下,帮做流程图,可追加分...

    我有个C语言编写的超市收银系统,请高手修改下,帮做流程图,可追加分0 #includestruct goods{ char no[8];3char name[20];  float price;  i ...

  6. “大蟒蛇”的养殖教程---“字符串”

    今天小编就给各位程序小白介绍一下大蟒蛇吧! Python,又叫"大蟒蛇",是21世纪较多猿猿喜欢的宠物,猿猿们可以利用"大蟒蛇"做很多事,例如现在流行的AI(人 ...

  7. 大蟒蛇:Python入门课程主要讲什么?

    Python用中文表达的意思就是大蟒蛇,那么Python入门课程主要有什么? Python基础入门课程包含的内容:变量,运算符,输入输出和条件以及循环语句等知识点,认识Python,学习第一个Pyth ...

  8. c语言外卖程序,课内资源 - 基于C语言的外卖派单模拟系统

    一.课题任务概述 你运行一家外卖快递服务店,负责一个区域内的外卖订单接收和餐食快递.你有一笔启动资金,可以招募外卖骑手帮你送餐,来赚取快递费.但你也会面临风险,本区域的订单你都有义务接收,不能拒绝,若 ...

  9. python爬虫torrentkitty的种子

    python爬虫torrentkitty的种子 话不多说上源代码,只要把lxml的库安装下就好了 这个程序完全是解放双手,而且没有弹窗网页等困扰 __author__ = 'JianqingJiang ...

  10. Python大作业-网络爬虫程序

    简介 此程序是本人大三时期的Python大作业,初学Python后所编写的一个程序,是一个网络爬虫程序,可爬取指定网站的信息. 本程序爬取的网站是Bangumi-我看过的动画,Bangumi是一个专注 ...

最新文章

  1. DELL服务器结合nagios硬件监控、报警
  2. .NET 2.0防止同用户同时登陆实例
  3. 虫师python appium自动化测试书_Appium移动自动化测试(一)--安装Appium
  4. 华为手机asph啥机型_华为正式宣布!19款机型开启新系统内测,你的手机榜首有名吗?...
  5. 昂贵的聘礼 poj 1062 dijsk
  6. 厉害了!阿里安全图灵实验室在ICDAR2017 MLT竞赛刷新世界最好成绩
  7. vue+element【后台案例 · 搜集 · 集锦】
  8. 神经网络隐藏层个数怎么确定_含有一个隐藏层的神经网络对平面数据分类python实现(吴恩达深度学习课程1第3周作业)...
  9. DataBase -- Operator
  10. 追逐鼠标光标的好奇小猫咪
  11. phpMyAdmin创建数据库无权限解决方案
  12. steam授权_听歌、看番、学习甚至开车...steam好像忘了自己是个游戏平台
  13. 开个怀旧零食店很有前景!
  14. android studio 中要在app名称中添加特殊符号
  15. 【Unity3D开发小游戏】Unity3D开发《3D迷宫》小游戏
  16. Recoil的简单实践和思考
  17. 创意库|Photoshop超跑汽车海报设计实例
  18. 蓝桥杯第四届C/C++ B省赛题目及题解
  19. LaTeX入门|(2)定制专属模板
  20. CSS3 border-radius 属性

热门文章

  1. JWT-JSON WEB TOKEN使用详解及注意事项
  2. 编译原理(第3版) 清华大学出版社 黄贤英等人著作 课程知识点总结
  3. vue的json格式化工具json-viewer
  4. CRC校验 与 模2运算
  5. php 降低视频分辨率,怎么看视频的分辨率 如何改变视频分辨率|视频分辨率修改器...
  6. php url伪静态,PHP url伪静态
  7. 用proteus进行一位全加器/减法器的设计与验证
  8. coreELEC ceemmc 写入 cm311-1a 刷机排坑 Armbian 搞机篇
  9. windows如何离线打补丁安全需知
  10. 【转】只有运用你的逻辑才能看懂其中的恐怖