SRE从踩坑到牛逼(二)利用Python进行Arcgis站点分析+Nginx日志分析
前面是日记,或者是牢骚吧
就感觉挺可惜的,一月份的时候因为分析日志的需求,写过一个几十行的代码,能够做到Arcgis站点分析,并且录入到Excel,根据Arcgis站点信息进行日志分析的部分还没写,这次需要用的时候就找不到了,于是重新写了代码,实现了 Arcgis站点分析+Nginx日志分析+录入到Excel 的功能。写完以后没几天又找到了旧代码。
颇有“知不可乎骤得,托遗响于悲风”的感伤
所以这一次先紧急贴代码并脱敏,会周末花两三天时间完成这个博客——所以现在就是一个半成品博客
旧代码记录
from asyncio.windows_events import NULL
import json
import os
import urllib.request
import tablib
import pandas
#import jiebaarc_srvs = {'a_map_1','a_map_2','a_map_3','a_map_4','a_map_5','a_map_6',#'e_map_1',#'f_map_1','b_map_1','b_map_2','c_map_1','d_map_1','a_map_xzq','g_map_1','g_map_2','g_map_3','b_map_dghy',
}
xls_data = []
header = tuple( ['server', 'name', 'url', 'count'] )def get_record(url):try:resp = urllib.request.urlopen(url)except:print("FAIL: ",url)return NULLelse:ele_json = json.loads(resp.read())return ele_jsondef get_services(arc_services, arc_url, arc_srv):for arc_service in arc_services :xls_body = []xls_body.append(arc_srv)xls_body.append(arc_service['name'])xls_body.append(arc_url+'/'+arc_service['name'])xls_body.append(0)xls_data.append(list(xls_body))# for log analyse # jieba.add_word(arc_url+'/'+arc_service['name'])def get_folder_services(arc_folders, arc_url, arc_srv):for arc_folder in arc_folders :arc_fd_json = get_record('http://此处已省去敏感url信息' + arc_url + '/' + arc_folder + '此处已省去敏感后缀')if arc_fd_json == NULL:print("INFO: FD NULL")continuearc_fd_services = arc_fd_json['services']arc_fd_folders = arc_fd_json['folders']if arc_fd_folders != []:get_folder_services(arc_fd_folders, arc_url, arc_srv)get_services(arc_fd_services, arc_url, arc_srv)if __name__ == '__main__':# analyse arcgis servicefor arc_srv in arc_srvs :# 站点格式: http://URL地址入口/站点名称/arcgis/rest/servicesarc_url = '/' + arc_srv + '/arcgis/rest/services'arc_json = get_record('http://此处已省去敏感url信息' + arc_url + '此处已省去敏感后缀')if arc_json == NULL:continue# print(arc_url,arc_json)arc_folders = arc_json['folders']arc_services = arc_json['services']get_services(arc_services, arc_url, arc_srv)get_folder_services(arc_folders, arc_url, arc_srv)# analyse nginx log fileswith open("F:\\nginx-log\\access.log") as logfin:for line in logfin:arr = line.split(' ')# print(arr[6])for xlsd in xls_data:#print(xlsd)if xlsd[2] in arr[6]:# print(xlsd[2])# print(xlsd[3])xlsd[3] += 1# save data to xls filexls_data = tablib.Dataset(*xls_data,headers=header)fin = open('data.xls', 'wb')fin.write(xls_data.xls)fin.close()
相关的敏感信息已经经过了脱敏处理。
仅仅做记录,逻辑还是很清楚的,我的缩进和命名也很规范,应该一看就明白。
新的代码思路
新的代码是面向对象的思想写的,目录结构为
│ config.py
│ run.py
│
├─app
│ analyse_log.py
│ check_exists.py
│ get_sites.py
│ save_to_excel.py
│
├─db
│ site.csv
│
├─excel
│ 站点信息.xlsx
│
├─test_logaccess.log
config.py为全局配置
from pathlib import Path
# 项目所在目录
BASE_PATH = Path(__file__).parent
# excel文件名
EXCEL_NAME = "站点信息.xlsx"
# excel路径
EXCEL_PATH = BASE_PATH.joinpath("excel")
MY_EXCEL_PATH=EXCEL_PATH.joinpath(EXCEL_NAME)
# 全局配置数据目录
SITE_PATH = BASE_PATH.joinpath("db").joinpath("site.csv")
# 全局配置日志路径
# NGINX_LOG_PATH = "/home/nginx/logs/host.access.log"
NGINX_LOG_PATH = BASE_PATH.joinpath("test_log").joinpath("host.access.log")
# 创建目录
EXCEL_PATH.mkdir(exist_ok=True, parents=True)
run.py为项目入口
from config import NGINX_LOG_PATH, SITE_PATH
from app.get_sites import Sitemsg
from app.analyse_log import AnalyseLog, ReadLog
from app.check_exists import check_exists
from app.save_to_excel import SaveFile
from pathlib import Pathif __name__ == '__main__':if not check_exists:exit(-1)stmsg = Sitemsg(SITE_PATH)stmsg.get_site_service()AnalyseLog(NGINX_LOG_PATH, stmsg)# stmsg.print_site_service()SaveFile(stmsg)ReadLog(NGINX_LOG_PATH, 10).show()
analyse_log.py
from app.get_sites import Sitemsgclass AnalyseLog(object):def __init__(self,log_filepath,sitemsg_obj):self.filepath = log_filepathself.sitemsg = sitemsg_objself.analyse()def analyse(self):with open(self.filepath, mode='r', encoding='utf-8') as log_file_obj:for line in log_file_obj:for site in self.sitemsg.siteobjs:"""class Sitesself.name = nameself.url = urlself.isnetsite = isnetsiteself.services = []"""name = site.nameservices = site.servicesif not name in line:continuefor service in services:if service in line:site.add_services_count(service)class ReadLog(object):def __init__(self,log_filepath,count):self.line = countself.filepath = log_filepathdef show(self):with open(self.filepath, mode='r', encoding='utf-8') as log_file_obj:for line in log_file_obj:print(line)if not self.line:breakself.line -= 1
check_exists.py
from config import NGINX_LOG_PATH, SITE_PATH
from pathlib import Pathdef check_exists():if not Path(SITE_PATH).exists():return 0if not Path(NGINX_LOG_PATH).exists():return 0return 1
get_sites.py
from config import SITE_PATH
import requests
import jsonclass Sites(object):def __init__(self,name,url,*,isnetsite=False):self.name = nameself.url = urlself.isnetsite = isnetsiteself.services = {}def append_services(self, service,*,count=0):# if service in self.services:# print("[ERROR] service {} is exists.{}. now parents is {}".format(service,self.services[service],self.name))# print(self.services)# exit(2)self.services[service] = {"parent":self.name, "count":count}def add_services_count(self,service,*,count=1):self.services[service]["count"] += count# print(self.services[service], self.services[service]["count"])def print_services(self):for item in self.services:print(item,self.services[item])class Sitemsg(object):def __init__(self,filepath):self.filepath = filepathself.title = Noneself.siteobjs = []self.create()def create(self):# 读取站点列表信息with self.filepath.open(mode='r', encoding="utf-8") as site_file_obj:self.title = site_file_obj.readline().strip().split(',')# print("[DEBUG] class Sitemsg self.title {}".format(self.title))for line in site_file_obj:name,isnetsite,url = line.strip().split(',')isnetsite = True if isnetsite == "互联网" else False# print("[DEBUG] class Sitemsg name={}, isnetsite={}, url={}".format(name,isnetsite,url))site = Sites(name,url,isnetsite=isnetsite)self.siteobjs.append(site)def get_site_service(self):# TODO 获取站点的服务for site in self.siteobjs:ReqGetService.get_service(site)def print_site_service(self):for site in self.siteobjs:site.print_services()def join(self):# TODO 拼接站点请求passclass ReqGetService(object):__url_tail="?f=pjson"@classmethoddef get_service(cls,site):# TODO 互联网逻辑请求# print("[DEBUG] class ReqGetService url: "+site.url+cls.__url_tail)res = requests.get(site.url+cls.__url_tail)# print("[DEBUG] class ReqGetService res.text:\n{}".format(res.text))"""[DEBUG] class Sitemsg res.text{"currentVersion": 10.61,"folders": ["FOLDER1","FOLDER2","FOLDER3"],"services": []}"""data_dict = json.loads(res.text) # {'currentVersion': 10.61, 'folders': ['FOLDER1', 'FOLDER2', 'FOLDER3'], 'services': []}# print("[DEBUG] class ReqGetService data_dict:\n{}".format(data_dict))for folder in data_dict['folders']:ReqGetService.get_folder_service(site,site.url,folder)for service in data_dict['services']:site.append_services(service['name'])@classmethoddef get_folder_service(cls,site,url,folder):# print("[DEBUG] class ReqGetService folder url: "+url+'/'+folder+cls.__url_tail)res = requests.get(url+'/'+folder+cls.__url_tail)data_dict = json.loads(res.text) # {'currentVersion': 10.61, 'folders': [], 'services': [{'name': 'FOLDER1/SERVICE2020', 'type': 'MapServer'}, {'name': 'FOLDER2/SERVICE22222', 'type': 'MapServer'}, ....]}# print("[DEBUG] class ReqGetService folder_data_dict:\n{}".format(data_dict))for folder in data_dict['folders']:print("folder again!")ReqGetService.get_folder_service(site,url,folder)for service in data_dict['services']:site.append_services(service['name'])
save_to_excel.py
from openpyxl import workbook
from config import MY_EXCEL_PATH
from app.get_sites import Sitemsg
class SaveFile(object):def __init__(self,sitemsg_opt):self.wb = workbook.Workbook()# del self.wb['Sheet']self.save(sitemsg_opt)self.wb.save(MY_EXCEL_PATH)def save(self,sitemsg_opt):sheet = self.wb['Sheet']sheet.cell(1,1).value = "Site"sheet.cell(1,2).value = "Service"sheet.cell(1,3).value = "Count"row_index = 2siteobjs = sitemsg_opt.siteobjsfor site in siteobjs:services = site.servicesfor item in services:sheet.cell(row_index,1).value = services[item]["parent"]sheet.cell(row_index,2).value = itemsheet.cell(row_index,3).value = services[item]["count"]row_index += 1
site.csv
站点名,网络环境,访问地址
a_map_1,局域网,http://URL地址入口/a_map_1/arcgis/rest/services
a_map_2,局域网,http://URL地址入口/a_map_2/arcgis/rest/services
a_map_3,局域网,http://URL地址入口/a_map_3/arcgis/rest/services
SRE从踩坑到牛逼(二)利用Python进行Arcgis站点分析+Nginx日志分析相关推荐
- 就是这么牛逼,利用Python绘制炫酷专业插图就是这么简单
强烈推荐 Python 的绘图模块 matplotlib: python plotting .画出来的图真的是高端大气上档次,低调奢华有内涵~ 适用于从 2D 到 3D,从标量到矢量的各种绘图.能够保 ...
- EDUSOHO踩坑笔记之四十二:资讯
EDUSOHO踩坑笔记之四十二:资讯 获取资讯列表信息 GET /articles/{id} 权限 老API,需要认证 参数 字段 是否必填 描述 sort string 否 排序,'created' ...
- 推荐一个牛逼的生物信息 Python 库 - Dash Bio
翻译 | Lemonbit 来源 | Plotly 译文出品 | Python数据之道 推荐一个牛逼的生物信息 Python 库 - Dash Bio Dash Bio 是一个免费的开源 Python ...
- 大写牛逼,用 Python 登录主流 24 个网站
爬虫脚本是大家经常用到的,那就避开不了登录 这一关. 使用Python一般需要request库,补充 header 中的 post 要素,有些还会有 隐藏的 hidden 参数,可以通过浏览器 F12 ...
- OrangePi PC 玩Linux主线内核踩坑之旅(二)之制作镜像后的查遗补缺
一.查遗补缺之SSH连接到香橙派 上一篇我们讲到,为了可以使用SSH连接到香橙派,我们勾选了dropbear软件包,这是一个轻量级的SSH软件,包含了客户端和服务端[1].如何使用呢?在上一篇的末尾, ...
- TS的踩坑之路(二)
一.函数返回写法报错 代码案例 报错信息 Line 9:53: Unexpected block statement surrounding arrow body; parenthesize the ...
- Easytrader踩坑之旅(二)
快速阅读 用的是THSTrader进行的调试,同花须必须用8.0的.在新的机子重新安装requirements已经调用同花顺查股票余额. 继续昨天的话题.昨天到最后,虽然显示了余额,但是和自己帐户上面 ...
- 太牛逼了,Python和算法简直是绝配
名师 带你刷爆LeetCode 算法知识 讲解+训练 免费0元报名参加 在讲到 AI 算法工程师时,大部分同学关注点都在高大上的模型,一线优秀的项目.但大家往往忽略了一点,人工智能的模型.项目最终还是 ...
- 牛逼!这个 Python 时间格式转换工具太优秀了!
这是「进击的Coder」的第 493 篇技术分享 作者:Ckend 来源:Python 实用宝典 " 阅读本文大概需要 6 分钟. " DeLorean 是一个 Python 的第 ...
最新文章
- 一起学DHCP系列(五)指派、获取
- (第六场)Singing Contest 【模拟】
- CSS基础(part21)--CSS3伪元素选择器
- python 爬虫代码_python之路(一)_爬虫—爬一下网页代码输出到文件
- cmail服务器安装后无法登录的解决办法
- 百度成立互联网医院;钉钉招小学生产品体验师;iOS 13.4 上线 | 极客头条
- python解zuobiaoxi方程_从马尔可夫链到蒙特卡洛-Metropolis方法(Python)
- 使用开源PhoneGap开发web app
- jsp在校大学生助学贷款管理系统
- matlab编程绘制直方图,matlab绘制直方图程序
- OSEK OS(OSEK 操作系统)
- 基于隐私保护的安全多方计算区块链融合技术的智能合约
- 贪心算法-2.找钱问题
- SMTP 550错误
- php把数组作为函数参数传递,使用关联数组作为函数的参数《 PHP 基础 》
- php pandoc,Pandoc 标记语言转化工具
- 不知不觉,二哥 CSDN 博客访问量破 1000 万了,这个成绩,全网也没几个吧?
- Python实战 -- 利用Flask搭建微电影网站(一)蓝图构建
- 【李嘉诚】教五年内买车买房——理财篇
- RabbitMq初识(一)