网站子域名扫描程序，采用三种扫描方式（crt网站查询、站长网站查询、字典暴力破解）

总体文件结构：
1个主文件common.py，
1个全局配置文件config.py，
1个随机取user_agent头文件user_agent_list.py,
暴力破解方式文件：brute_check.py,brute.py,domain_dic.txt(暴力破解的字典)
crt网站(https://crt.sh/)查询子域名文件：crt_check.py,crt.py
站长网站(http://tool.chinaz.com/subdomain/)查询子域名文件：chinaz_check.py,chinaz.py

主文件common.py：
封装了发送网络请求的方式和main主方法，扫描结果列表去重工作

#!/usr/bin/env python
# -*- coding: utf-8 -*-import re
import urllib3
import requests
import user_agent_list
from config import *
from brute_check import *
from crt_check import crt_check
from chinaz_check import *# 网站子域名扫描器
# 当网站主站无从下手时，可以选择从c段或者同IP站点下手进行测试
# 举例，比如某个网站www.test.com 主站基本为静态页面，无从下手，但是扫描后得知存在bbs.test.com，可以从论坛下手拿到管理权限。# 方式：暴力破解(子域名字典爆破)，搜索引擎查询，页面爬取，第三方查询https://crt.sh/（发送请求，分析返回内容，整理数据，展现），
# xxx.com/crossdomain.xml文件，通过HTTPS证书搜集（调用对方api接口获取信息，展现），如  https://censys.io/# 程序设计，utils目录下存放不同的查询方式脚本。common.py为主程序文件，发送requests网络请求，判断返回信息。config.py为全局配置文件，方便后期修改配置，包括访问超时时间，随机的user_agent头。
# 百度搜索子域名 site:xxx.com,或者xxx.cn
# 站长助手-子域名查询     http://tool.chinaz.com/subdomain/# common.py这个文件是用来封装get和post网络请求方法# 因为访问的https网站，所以使用urllib3的disable_warnings方法来屏蔽掉https的警告信息，不会杂乱requests.packages.urllib3.disable_warnings()
rr = []def http_requests_get(url, allow_redirects=False):try:result = requests.get(url=url,headers=user_agent_list.get_user_agent(),timeout=8,verify=allow_ssl_verify)if result.status_code == 200:return resultexcept Exception as e:return requests.models.Response()def http_requests_post(url, payload, allow_redirects=allow_redirects):try:result = requests.get(url=url,data=payload,headers=headers,timeout=timeout,allow_redirects=allow_redirects,verify=allow_ssl_verify)return resultexcept Exception as e:return requests.models.Response()# 检测传入的域名地址是否是完整域名
def is_domain(domain):domain_regex = re.compile('[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+\.?', re.IGNORECASE)return True if domain_regex.match(domain) else Falsedef main(domin):# crt网站查询子域名print('正在使用crt网站查询子域名:' + domin)rr1 = crt_check(domin)print(rr1)# 暴力破解查询子域名print('\n')print('正在使用暴力破解域名:' + domin)rr2 = brute_check(domin)print('\r')print(rr2)# 站长网站查询子域名print('\n')print('正在使用站长中国网站查询子域名:' + domin)rr3 = chinazz_check(domin)print(rr3)for i in rr1:if i not in rr:rr.append(i)for y in rr2:if y not in rr:rr.append(y)for n in rr3:if n not in rr:rr.append(n)print('\n')print(domin + '网站的子域名最终查询结果:')print(rr)if __name__ == '__main__':main('5nd.com')

全局配置文件config.py:
全局配置文件，主要提供网络请求，多线程等各参数的配置设置，随机的user_agent头，XFF头等信息

#!/usr/bin/env python
# -*- coding: utf-8 -*-# 全局配置信息import sys
import random# 是否开启https服务器的证书校验
allow_ssl_verify = False# --------------------------------------------------
# requests配置项
# --------------------------------------------------# 超时时间
timeout = 1#线程数
thread_count=10# 是否允许URL重定向
allow_redirects = True# 是否允许集成http Request类的Session支持，在发出的所有请求之间保持cookies
allow_http_session = True# 是否允许随机User-Agent头
allow_random_useragent = False# 是否允许随机X-Forwarded-For
allow_random_x_forward = False# 随机User-Agent头
USER_AGENTS = [{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'},{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'},{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'},{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'},{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'},{'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'},]# 随机生成User-Agent头,默认给condition定位False，就是使用USER_AGENTS列表的第一个User-Agent头
def random_useragent(condition=False):if condition:return random.choice(USER_AGENTS)else:return USER_AGENTS[0]# 随机X-Forwarded-For，动态IP，默认是用8.8.8.8，开启了就返回随机的一个ip
def random_x_forwarded_for(condition=False):if condition:return '%d.%d.%d.%d' % (random.randint(1, 254), random.randint(1, 254), random.randint(1, 254), random.randint(1, 254))else:return '8.8.8.8'# HTTP的headers头设置
headers = {'User-Agent': random_useragent(allow_random_useragent),'X_FORWARDED_FOR': random_x_forwarded_for(allow_random_x_forward),'Referer': 'http://www.baidu.com','Cookie': "",}

随机取user_agent头文件user_agent_list.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-import random#获取随机的一个User-Agent头
def get_user_agent():user_agent_list = [{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'},{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'},{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'},{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'},{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'},{'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'},]return random.choice(user_agent_list)

暴力破解方式文件：brute_check.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-from utils.brute import *sub_domain_old = []
sub_domain = []# 该方法通过加载字典列表，来拼接成url，多线程发起网络请求，如果返回状态码是200则证明url存在，最终返回存在的子域名列表sub_domain
# 比如sub_domain=brute_check('csdn.net')
def brute_check(domain):# print(domain)bb = Brute(domain)bb.run()sub_domain_old = bb.result# print(bb.result)# 列表去重for u in sub_domain_old:if u not in sub_domain:sub_domain.append(u)# print(sub_domain)# print(page)return sub_domain#调试用
# y = brute_check('csdn.net')
# print('\n')
# for i in y:
#     print(i)

暴力破解方式文件：brute.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-import threading
from queue import Queue
import sysfrom config import *
from common import http_requests_get,is_domain# 暴力破解
# 调用多线程的方式，进行暴力破解
class Brute(object):def __init__(self, domain):self.domain = domain# 不限制上线的队列self.queue = Queue()# 线程数self.thread_count = thread_count# 保存结果self.result = []# 这个run方法干两件事，给queue队列压入url，之后创建多线程对象，把类BruteThread对象丢进多线程里跑起来def run(self):with open('dict/domain_dic.txt')as f:for i in f:self.queue.put(i.rstrip() + '.' + self.domain)threads = []total = self.queue.qsize()for i in range(self.thread_count):threads.append(self.BruteThread(self.queue, self.result, total))for t in threads:t.start()for t in threads:t.join()class BruteThread(threading.Thread):def __init__(self, queue, result, total):threading.Thread.__init__(self)self._queue = queueself.result = resultself.total = totaldef run(self):while not self._queue.empty():# 拿到字典和domain拼接好的url，例如flatfish.5nd.com，尝试发送网络请求看看有没有效sub = self._queue.get_nowait()# print(sub)try:self.msg()result = http_requests_get('http://' + sub)# print(result.status_code)if result.status_code == 200:# print(sub)self.result.append(sub)except Exception as e:print(e)passdef msg(self):done_count = float(self.total - self._queue.qsize())all_count = float(self.total)found_count = len(self.result)msg = '[-]Last {} | Complete {:.2f}% | Found {}'.format(self._queue.qsize(), (done_count / all_count) * 100,found_count)sys.stdout.write('\r' + msg)sys.stdout.flush()

暴力破解方式文件：domain_dic.txt(暴力破解的字典)

admin
bbs
blog
download
news
edu
passport
feed
articles
www
m
so
fawn
woodb
spiderb
spiderc
spidera
spendlessshopmore
sonja
vani
woods
afmpc1
afmpc3
woody
kalmar
topaliatzidiko
canes
whitebear
chromakode
gac
oxds08
bnislaam
rickmac
computertabloid
petral
lechenievennarodnymisredstvami
wirelessguest
bratislava
homedesigning
mrpvax
spider1
broward
s306
guertin
ilesxi
wednesday
fotopanass
cmfdev
elgar
artigos
s307
i-heart-baking
thrace
bmacneil
bundlelagi
targi
bral
gorman
sweet-as-sugar-cookies
audvis
hazeltine-gw

crt网站(https://crt.sh/)查询子域名文件：crt_check.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-from utils.crt import *
from bs4 import BeautifulSoup as bs
import re# 该方法通过crt.sh来查询子域名，返回查询到的子域名列表 ,传入域名进来即可，比如sub_domain=crt_check('csdn.net')sub_domain_old = []
sub_domain = []def crt_check(domain):# domain = 'csdn.net'crt = Crt(domain).run()soup = bs(crt, 'html.parser')y = soup.find_all('tr')for i in y:if domain in str(i):soup1 = bs(str(i), 'html.parser')yy = soup1.find_all('td')for yyy in yy:good = re.findall('<td>(.*?)</td>', str(yyy))for goodd in good:if domain in goodd:if is_domain1(goodd):sub_domain_old.append(goodd)# 列表去重for u in sub_domain_old:if u not in sub_domain:sub_domain.append(u)# print(sub_domain)return sub_domain#检查是不是域名
def is_domain1(domain):domain_regex = re.compile('[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+\.?', re.IGNORECASE)return True if domain_regex.match(domain) else False# rr = crt_check('csdn.net')
# print(123)

crt网站(https://crt.sh/)查询子域名文件：crt.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-import common# https://crt.sh/# get 请求 https://crt.sh/?q=csdn.netclass Crt(object):def __init__(self,domain):self.domain=domainself.site='https://crt.sh/?q=%25.'#保存产生的结果self.result=[]def run(self):url=self.site+self.domaintry:r= common.http_requests_get(url=url)self.result.append(r.text)return r.textexcept Exception as e:print(e)# print(self.result)

站长网站(http://tool.chinaz.com/subdomain/)查询子域名文件：chinaz_check.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-# 站长助手-子域名查询     http://tool.chinaz.com/subdomain/from utils.chinaz import Chinaz
from bs4 import BeautifulSoup as bs
import resub_domain_old = []
sub_domain = []# 先请求一次正文，拿到多少页，之后再for循环，追加将结果写进列表里# 该方法通过tool.chinaz.com/subdomain/来查询子域名，返回查询到的子域名列表 ,传入域名进来即可，比如sub_domain=chinazz_check('csdn.net')
def chinazz_check(domain):# 把域名给进类对象，创建类对象实例chinaz = Chinaz(domain)# 拿到一共有多少页,意味着要发多少次请求page = chinaz.get_page()for p in range(1, page + 1):# 拿到网络请求返回的正文chinaz = Chinaz(domain).get_text(p)# print(chinaz.result)# 正常是，我查哪个网站，我就传一个域名进来，之后发送一次网络请求，有多少条信息，核酸成有多少页，一页20条嘛，之后有多少页就发送多少个请求，之后将结果全部追加进列表里# 解析成soup对象soup = bs(chinaz.result, 'html.parser')#y = soup.find_all('div', attrs={'class': 'w23-0 subdomain'})for i in y:# print(i)good = re.findall('domain=(.*?)"', str(i))for goodd in good:if is_domain2(goodd):sub_domain_old.append(goodd)# 列表去重for u in sub_domain_old:if u not in sub_domain:sub_domain.append(u)# print(sub_domain)# print(page)return sub_domain# 检查是不是域名
def is_domain2(domain):domain_regex = re.compile('[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+\.?', re.IGNORECASE)return True if domain_regex.match(domain) else False# rr = chinazz_check('bilibili.com')
# for r in rr:
#     print(r)

站长网站(http://tool.chinaz.com/subdomain/)查询子域名文件：chinaz.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-import reimport common
from common import *# 站长助手-子域名查询     http://tool.chinaz.com/subdomain/class Chinaz(object):def __init__(self, domain):self.domain = domainself.site = 'http://tool.chinaz.com/subdomain?domain='# 保存查出来多少个子域名的数量,查每个网站的条数是固定的self.num = 0# 保存产生的结果self.result = ''# 拿到请求网站返回的正文def get_text(self, page_number):url = self.site + self.domain + '&page=' + str(page_number)# print(url)try:r =common.http_requests_get(url=url)self.result = r.text# 我们需要拿到网站查出来多少个子域名？# print(self.result)# print(type(page_num))# print('条数:'+str(self.num))# 把类对象返回去了return selfexcept Exception as e:print(e)# print(self.result)pass# 拿到应该请求多少页的页数def get_page(self):url = self.site + self.domain + '&page=1'# print(url)try:r = common.http_requests_get(url=url)self.result = r.texth = re.findall('-blue01">(.*?)</strong>', self.result)page_num = int(h[0])# 把查到的子域名条数复制给类对象的numself.num = page_num# print(self.num)except Exception as e:print(e)nn = int(self.num / 20) + 1# print(nn)return nn

执行结果：