• 判断文件或者文件夹是否存在
if(os.path.exists(rootdir) == False)
  • 创建文件夹
os.mkdir(rootdir)
  • 调用系统命令
os.system(cmd)
  • 字典循环
for key,value in dict.items()
  • 打开文件并读取内容进行处理
fd = open('xxxx.txt', encoding='utf-8')
for line in fd:
print line
fd.close()

  • 创建文件并写入内容
fd = open('xxxx.txt', 'a+', encoding='utf-8')
fd.write('aaaaa' + '\n')
fd.close()

  • 使用xlrd读取EXCEL
导入
import xlrd
打开excel
data = xlrd.open_workbook('demo.xls') #注意这里的workbook首字母是小写
查看文件中包含sheet的名称
data.sheet_names()
得到第一个工作表,或者通过索引顺序 或 工作表名称
table = data.sheets()[0]
table = data.sheet_by_index(0)
table = data.sheet_by_name(u'Sheet1')
获取行数和列数
nrows = table.nrows
ncols = table.ncols
获取整行和整列的值(数组)
table.row_values(i)
table.col_values(i)
循环行,得到索引的列表
for rownum in range(table.nrows):
print table.row_values(rownum)
单元格
cell_A1 = table.cell(0,0).value
cell_C4 = table.cell(2,3).value
分别使用行列索引
cell_A1 = table.row(0)[0].value
cell_A2 = table.col(1)[0].value
简单的写入
row = 0
col = 0
ctype = 1 # 类型 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
value = 'lixiaoluo'
xf = 0 # 扩展的格式化 (默认是0)
table.put_cell(row, col, ctype, value, xf)
table.cell(0,0) # 文本:u'lixiaoluo'
table.cell(0,0).value # 'lixiaoluo'

  • 使用xlwt写入EXCEL
导入xlwt
import xlwt
新建一个excel文件
file = xlwt.Workbook() #注意这里的Workbook首字母是大写,无语吧
新建一个sheet
table = file.add_sheet('sheet name')
写入数据table.write(行,列,value)
table.write(0,0,'test')
如果对一个单元格重复操作,会引发
returns error:
# Exception: Attempt to overwrite cell:
# sheetname=u'sheet 1' rowx=0 colx=0
所以在打开时加cell_overwrite_ok=True解决
table = file.add_sheet('sheet name',cell_overwrite_ok=True)
保存文件
file.save('demo.xls')
另外,使用style
style = xlwt.XFStyle() #初始化样式
font = xlwt.Font() #为样式创建字体
font.name = 'Times New Roman'
font.bold = True
style.font = font #为样式设置字体
table.write(0, 0, 'some bold Times text', style) # 使用样式

  • 命令行getopt
try:
options,args = getopt.getopt(sys.argv[1:],"hp:i:",["help","ip=","port="])
except getopt.GetoptError:
sys.exit()
for name,value in options:
if name in ("-h","--help"):
usage()
if name in ("-i","--ip"):
print(value)
if name in ("-p","--port"):
print(value)

  • 简单爬虫
import requests
AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
HEADERS = {
'User-Agent': AGENT,
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With':'XMLHttpRequest',
'Accept':'*/*'
session = requests.session()
#模拟登录
postdata = {
'defaults':'xxx',
'fromLogin':'xxx',
'userName':'xxx',
'password':'xxxx'
}
url = 'xxxxxxxx'
login_info = session.post(url, headers = HEADERS, data = postdata,verify = False)
if(login_info.status_code == requests.codes.ok):
print('login success')
return True
else:
print('login err')
return False
}
#下载html页面
def downloadUrl(rootdir, url, orgid, page):
html = session.get(url, headers=global_config.HEADERS, verify=False)
if(html.text[1:7] == 'script'):
print(html.text)
return "err"
if(len(html.text) < 60):
return "err"
sample = open(rootdir + "/" + str(orgid) + '_' + str(page) + ".html", "w", encoding='utf-8')
sample.write(html.text)
sample.close()
return 'ok'

  • 解析JOSN文件内容
def scrapy_by_file(json_file_name):
#读取JSON文件的内容
text = open(json_file_name, encoding='utf-8').read()
#特殊处理,去除从WINDOWS系统带过来的BOM特殊字符
if text.startswith(u'\ufeff'):
text = text.encode('utf8')[3:].decode('utf8')
#将文本内容的JSON数据转换成自定义的JSON对象
try:
json_data = json.loads(text)
except:
print(json_file_name)
return
for row in json_data['rows']:
def scrapy_by_row(row):
try:
orgid = row['organization']['id']
familyid = row['censusRegisterFamily']['id']
except:
print('errrr')
return
scrapy_by_row(row)

  • 遍历文件夹
#遍历目录(rootdir) 遍历到的每个文件都执行dirFunc
def waklThroughDir(rootdir, dirFunc):
for parent, dirnames, filenames in os.walk(rootdir):
for filename in filenames:
print(filename)
#获取后缀为txt的文件
if(filename.split('.')[-1] == 'html'):
dirFunc(os.path.join(parent, filename))

  • 采集温州房产网基本信息
# -*- coding: utf-8 -*-
import re
import requests
import time
#-----------------------------用于解析的正则表达式常量------------------------------------------------------------------
#解析页数
PAGE_NUM = '共找到 (.*?) 符合条件的记录'
#解析小区名称
NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">'
#解析小区价格
PRICE = 'class="hot_price">(.*?)</span>'
#解析小区地址
ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>'
#文件生成路径
ROOTDIR = 'F:\\test\\'
#-----------------------------模拟请求的头部信息,否则将被识别出是程序抓包而被拦截--------------------------------------
HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
'Host': 'www.0577home.net',
'Upgrade-Insecure-Requests': '1'
}
#-----------------------------抓取某一页的房产信息,pageNo为页号--------------------------------------------------------
def getHouseListByPageno(pageNo):
#建立一个连接用于后续发起请求
session = requests.session()
url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html'
houseList = session.get(url, headers = HEADERS, verify = False)
#以写入模式打开文件
fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",  'w' ,encoding='utf-8')
#将movieList写入文件
fh.write(houseList.text)
#关闭文件
fh.close()
#-------------------------------获取需要抓取的页面总数------------------------------------------------------------------
def getPageNum():
#打开已经下载好的第一页房产内容
f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8')
#获取文件内容
rawContent = f.read()
#用正则表达式解析页面内容
pageNum = re.findall(PAGE_NUM, rawContent)
#返回页面号
return int(pageNum[0]) / 20 + 1
def parseHouseListToFile(srcFile, dstFile):
#打开待解析的文件
f = open(srcFile, encoding='utf-8')
#读取文件内容以备解析
rawContent = f.read()
p = re.compile('\s+')
content = re.sub(p, '', rawContent)
dnames = re.findall(NAME, content)
names = []
for dname in dnames:
idx = dname.rfind('>')
names.append(dname[idx + 1:])
prices = re.findall(PRICE, content)
daddress = re.findall(ADDRESS, content)
address = []
for daddr in daddress:
id = daddr.rfind('>')
address.append(daddr[id + 1:])
i = 0
for x in names:
#写入时用'$'做分割,结尾加上回车符
dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '\n')
i = i + 1
#-------------------------------主函数,下载并解析房产信息--------------------------------------------------------------
if __name__ == '__main__':
#---------------------抓取页面-----------------------------
#抓取第一页房产信息
getHouseListByPageno(1)
#通过第一页房产信息获取总共要抓取的页面数量
pageNum = getPageNum()
#抓取剩余的页面
for i in range(2, int(pageNum) + 1):
getHouseListByPageno(str(i))
#---------------------解析页面-----------------------------
#获取当前年月日
localtime = time.strftime('%Y%m%d', time.localtime(time.time()))
#创建一个文件,文件名前面带上年月日
f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8')
#解析所有的页面
#for k in range(1, int(pageNum) + 1):
for k in range(1, 115):
parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f)
#关闭文件
f.close()

  • 采集温州房产网详细信息
# -*- coding: utf-8 -*-
import re
import requests
import time
import os
#-----------------------------用于解析的正则表达式常量------------------------------------------------------------------
#解析页数
PAGE_NUM = '共找到 (.*?) 符合条件的记录'
#解析小区名称
NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">'
#解析小区价格
PRICE = 'class="hot_price">(.*?)</span>'
#解析小区地址
ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>'
#解析小区编号
ID = 'class="picdiv_left"><ahref="http://www.0577home.net/xiaoqu/(.*?).html'
#解析小区所属区域
LOCATION = '<div><a>所属区域:</a><span>(.*?)</span></div>'
#解析小区占地面积
AREA = '<div><a>占地面积:</a><span>(.*?)</span></div>'
#解析小区绿化率
GREENINGRATE = '<div><a>绿化率:</a><span>(.*?)</span></div>'
#解析小区楼总数
LAYER = '<div><a>楼总数:</a><span>(.*?)</span></div>'
#解析小区物业类型
TYPE = '<div><a>物业类型:</a><span>(.*?)</span></div>'
#解析小区所属小学
PRIMARYSCHOOL = '<div><a>所属小学:</a><span>(.*?)</span></div>'
#解析小区总建筑面积
BUILDINGAREA = '<div><a>总建筑面积:</a><span>(.*?)</span></div>'
#解析小区容积率
PLOTRATIO = '<div><a>容积率:</a><span>(.*?)</span></div>'
#解析小区开发商
DEVEPLOPER = '<div><a>开发商:</a><span>(.*?)</span></div>'
#文件生成路径
ROOTDIR = 'F:\\test\\'
#-----------------------------模拟请求的头部信息,否则将被识别出是程序抓包而被拦截--------------------------------------
HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
'Host': 'www.0577home.net',
'Upgrade-Insecure-Requests': '1'
}
#-----------------------------抓取某一页的房产信息,pageNo为页号--------------------------------------------------------
def getHouseListByPageno(pageNo):
#建立一个连接用于后续发起请求
session = requests.session()
url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html'
houseList = session.get(url, headers = HEADERS, verify = False)
#以写入模式打开文件
fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",  'w' ,encoding='utf-8')
#将movieList写入文件
fh.write(houseList.text)
#关闭文件
fh.close()
def getHouseInfoByPageno(pageNo, k):
if(os.path.exists(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html")):
return
print('downloading !, count %s, page %s' % (str(k), str(pageNo)))
#建立一个连接用于后续发起请求
session = requests.session()
url = 'http://www.0577home.net/xiaoqu/detail_' + str(pageNo) + '.html'
houseList = session.get(url, headers = HEADERS, verify = False)
#以写入模式打开文件
fh = open(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html",  'w' ,encoding='utf-8')
#将movieList写入文件
fh.write(houseList.text)
#关闭文件
fh.close()
#-------------------------------获取需要抓取的页面总数------------------------------------------------------------------
def getPageNum():
#打开已经下载好的第一页房产内容
f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8')
#获取文件内容
rawContent = f.read()
#用正则表达式解析页面内容
pageNum = re.findall(PAGE_NUM, rawContent)
#返回页面号
return int(pageNum[0]) / 20 + 1
def parseHouseInfo(srcFile):
#打开待解析的文件
f = open(srcFile, encoding='utf-8')
#读取文件内容以备解析
content = f.read()
# p = re.compile('\s+')
# content = re.sub(p, '', rawContent)
location = re.findall(LOCATION, content)[0]
location = location.split(' ')
category1 = location[0]
category2 = location[1]
area = re.findall(AREA, content)[0]
greeningrate = re.findall(GREENINGRATE, content)[0]
layer = re.findall(LAYER, content)[0]
type = re.findall(TYPE, content)[0]
primaryschool = re.findall(PRIMARYSCHOOL, content)[0]
buildingarea = re.findall(BUILDINGAREA, content)[0]
plotratio = re.findall(PLOTRATIO, content)[0]
developer = re.findall(DEVEPLOPER, content)[0]
f.close()
return (category1, category2, area, greeningrate, layer, type, primaryschool, buildingarea, plotratio, developer)
def parseHouseListToFile(srcFile, dstFile):
#打开待解析的文件
f = open(srcFile, encoding='utf-8')
#读取文件内容以备解析
rawContent = f.read()
p = re.compile('\s+')
content = re.sub(p, '', rawContent)
dnames = re.findall(NAME, content)
names = []
for dname in dnames:
idx = dname.rfind('>')
names.append(dname[idx + 1:])
prices = re.findall(PRICE, content)
daddress = re.findall(ADDRESS, content)
ids = re.findall(ID, content)
address = []
for daddr in daddress:
id = daddr.rfind('>')
address.append(daddr[id + 1:])
i = 0
f.close()
for x in names:
#写入时用'$'做分割,结尾加上回车符
dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '$' + ids[i] + '\n')
i = i + 1
#-------------------------------主函数,下载并解析房产信息--------------------------------------------------------------
if __name__ == '__main__':
#---------------------抓取页面-----------------------------
#抓取第一页房产信息
# getHouseListByPageno(1)
# #通过第一页房产信息获取总共要抓取的页面数量
# pageNum = getPageNum()
# #抓取剩余的页面
# for i in range(2, int(pageNum) + 1):
#    getHouseListByPageno(str(i))
#---------------------解析页面-----------------------------
#获取当前年月日
localtime = time.strftime('%Y%m%d', time.localtime(time.time()))
#创建一个文件,文件名前面带上年月日
f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8')
#解析所有的页面
#for k in range(1, int(pageNum) + 1):
for k in range(1, 115):
parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f)
#关闭文件
f.close()
f = open(ROOTDIR + localtime + '_houseList.txt', encoding='utf-8')
fd = open(ROOTDIR + localtime + '_houseInfo.txt', 'w', encoding='utf-8')
k = 0
for line in f:
data = line.strip('\n')
data = data.split('$')
idx = data[3]
getHouseInfoByPageno(idx, k)
houseInfo = parseHouseInfo(ROOTDIR + "houseInfo_pageNo" + str(idx) + ".html")
print(str(k) + "$".join(data) + '$' + "$".join(houseInfo))
fd.write("$".join(data) + '$' + "$".join(houseInfo) + '\n')
k += 1
f.close()
fd.close()

  • 读取csv文件
with open('job.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
print(row)

  • 写入csv文件
#创建CSV文件并写入第一行
def createCsv(file):
if not os.path.exists(file):
csvfile = open(file, 'a+', encoding='utf-8', newline='')
writer = csv.writer(csvfile)
writer.writerow(paramname)
else:
csvfile = open(file, 'a+', newline='')
writer = csv.writer(csvfile)
return writer

  • python调用JAVA
import sys
import jpype
name = sys.argv[1]
jarpath = '/home/dsadm/why/python'
jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.ext.dirs=%s" % jarpath)
DECRYPT = jpype.JClass('why.fmrt.decrypt.DECRYPT')
upperName =DECRYPT.decrypt(name)
print(upperName)
jpype.shutdownJVM()

  • 简单验证码破解
from urllib.request import urlretrieve
from urllib.request import urlopen
from bs4 import BeautifulSoup
import subprocess
import requests
from PIL import Image
from PIL import ImageOps
def cleanImage(imagePath):
image = Image.open(imagePath)
image = image.point(lambda x: 0 if x<143 else 255)
borderImage = ImageOps.expand(image,border=20,fill='white')
borderImage.save(imagePath)
html = urlopen("http://www.pythonscraping.com/humans-only")
bsObj = BeautifulSoup(html, "html.parser")
#Gather prepopulated form values
imageLocation = bsObj.find("img", {"title": "Image CAPTCHA"})["src"]
formBuildId = bsObj.find("input", {"name":"form_build_id"})["value"]
captchaSid = bsObj.find("input", {"name":"captcha_sid"})["value"]
captchaToken = bsObj.find("input", {"name":"captcha_token"})["value"]
captchaUrl = "http://pythonscraping.com"+imageLocation
urlretrieve(captchaUrl, "captcha.jpg")
cleanImage("captcha.jpg")
p = subprocess.Popen(["tesseract", "captcha.jpg", "captcha"], stdout=
subprocess.PIPE,stderr=subprocess.PIPE)
p.wait()
f = open("captcha.txt", "r")
#Clean any whitespace characters
captchaResponse = f.read().replace(" ", "").replace("\n", "")
print("Captcha solution attempt: "+captchaResponse)
if len(captchaResponse) == 5:
params = {"captcha_token":captchaToken, "captcha_sid":captchaSid,
"form_id":"comment_node_page_form", "form_build_id": formBuildId,
"captcha_response":captchaResponse, "name":"Ryan Mitchell",
"subject": "I come to seek the Grail",
"comment_body[und][0][value]":
"...and I am definitely not a bot"}
r = requests.post("http://www.pythonscraping.com/comment/reply/10",
data=params)
responseObj = BeautifulSoup(r.text)
if responseObj.find("div", {"class":"messages"}) is not None:
print(responseObj.find("div", {"class":"messages"}).get_text())
else:
print("There was a problem reading the CAPTCHA correctly!")

  • 滑块验证码破解
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
import PIL.Image as image
import time,re, random
import requests
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
#爬虫模拟的浏览器头部信息
agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0'
headers = {
'User-Agent': agent
}
# 根据位置对图片进行合并还原
# filename:图片
# location_list:图片位置
#内部两个图片处理函数的介绍
#crop函数带的参数为(起始点的横坐标,起始点的纵坐标,宽度,高度)
#paste函数的参数为(需要修改的图片,粘贴的起始点的横坐标,粘贴的起始点的纵坐标)
def get_merge_image(filename,location_list):
#打开图片文件
im = image.open(filename)
#创建新的图片,大小为260*116
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
# 拷贝图片
for location in location_list:
#上面的图片
if location['y']==-58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
#下面的图片
if location['y']==0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
#黏贴图片
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
return new_im
#下载并还原图片
# driver:webdriver
# div:图片的div
def get_image(driver,div):
#找到图片所在的div
background_images=driver.find_elements_by_xpath(div)
location_list=[]
imageurl=''
#图片是被CSS按照位移的方式打乱的,我们需要找出这些位移,为后续还原做好准备
for background_image in background_images:
location={}
#在html里面解析出小图片的url地址,还有长高的数值
location['x']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][1])
location['y']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][2])
imageurl=re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][0]
location_list.append(location)
#替换图片的后缀,获得图片的URL
imageurl=imageurl.replace("webp","jpg")
#获得图片的名字
imageName = imageurl.split('/')[-1]
#获得图片
session = requests.session()
r = session.get(imageurl, headers = headers, verify = False)
#下载图片
with open(imageName, 'wb') as f:
f.write(r.content)
f.close()
#重新合并还原图片
image=get_merge_image(imageName, location_list)
return image
#对比RGB值
def is_similar(image1,image2,x,y):
pass
#获取指定位置的RGB值
pixel1=image1.getpixel((x,y))
pixel2=image2.getpixel((x,y))
for i in range(0,3):
# 如果相差超过50则就认为找到了缺口的位置
if abs(pixel1[i]-pixel2[i])>=50:
return False
return True
#计算缺口的位置
def get_diff_location(image1,image2):
i=0
# 两张原始图的大小都是相同的260*116
# 那就通过两个for循环依次对比每个像素点的RGB值
# 如果相差超过50则就认为找到了缺口的位置
for i in range(0,260):
for j in range(0,116):
if is_similar(image1,image2,i,j)==False:
return  i
#根据缺口的位置模拟x轴移动的轨迹
def get_track(length):
pass
list=[]
#间隔通过随机范围函数来获得,每次移动一步或者两步
x=random.randint(1,3)
#生成轨迹并保存到list内
while length-x>=5:
list.append(x)
length=length-x
x=random.randint(1,3)
#最后五步都是一步步移动
for i in range(length):
list.append(1)
return list
#滑动验证码破解程序
def main():
#打开火狐浏览器
driver = webdriver.Firefox()
#用火狐浏览器打开网页
driver.get("http://www.geetest.com/exp_embed")
#等待页面的上元素刷新出来
WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed())
WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed())
WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed())
#下载图片
image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div")
image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div")
#计算缺口位置
loc=get_diff_location(image1, image2)
#生成x的移动轨迹点
track_list=get_track(loc)
#找到滑动的圆球
element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")
location=element.location
#获得滑动圆球的高度
y=location['y']
#鼠标点击元素并按住不放
print ("第一步,点击元素")
ActionChains(driver).click_and_hold(on_element=element).perform()
time.sleep(0.15)
print ("第二步,拖动元素")
track_string = ""
for track in track_list:
#不能移动太快,否则会被认为是程序执行
track_string = track_string + "{%d,%d}," % (track, y - 445)
#xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值,而轨迹变量里的是圆球的中心点,所以要加上圆球长度的一半。
#yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的,要保证最终的计算后的值是22,也就是圆球高度的一半
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track+22, yoffset=y-445).perform()
#间隔时间也通过随机函数来获得,间隔不能太快,否则会被认为是程序执行
time.sleep(random.randint(10,50)/100)
print (track_string)
#xoffset=21,本质就是向后退一格。这里退了5格是因为圆球的位置和滑动条的左边缘有5格的距离
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()
time.sleep(0.1)
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()
time.sleep(0.1)
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()
time.sleep(0.1)
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()
time.sleep(0.1)
ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()
print ("第三步,释放鼠标")
#释放鼠标
ActionChains(driver).release(on_element=element).perform()
time.sleep(3)
#点击验证
# submit = driver.find_element_by_xpath("//div[@class='gt_ajax_tip success']")
# print(submit.location)
# time.sleep(5)
#关闭浏览器,为了演示方便,暂时注释掉.
#driver.quit()
#主函数入口
if __name__ == '__main__':
pass
main()

  • python构建web页面
import os
import tornado.httpserver
import tornado.ioloop
import tornado.options
import tornado.web
from view import *
from tornado.options import define, options
define("port", default=8000, help="run on the given port", type=int)
class Application(tornado.web.Application):
def __init__(self):
handlers = [
(r"/", Indexhandler),
]
settings = dict(
template_path=os.path.join(os.path.dirname(__file__), 'templates'),
autoescape=None,
debug=False,
)
tornado.web.Application.__init__(self, handlers, **settings)
if __name__ == "__main__":
tornado.options.parse_command_line()
http_server = tornado.httpserver.HTTPServer(Application(), xheaders=True)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()

  • 定时任务
#! /usr/bin/env python
# coding=utf-8
import time, os, sched
# 第一个参数确定任务的时间,返回从某个特定的时间到现在经历的秒数
# 第二个参数以某种人为的方式衡量时间
schedule = sched.scheduler(time.time, time.sleep)
def perform_command(cmd, inc):
# 安排inc秒后再次运行自己,即周期运行
schedule.enter(inc, 0, perform_command, (cmd, inc))
os.system(cmd)
def timming_exe(cmd, inc=60):
# enter用来安排某事件的发生时间,从现在起第n秒开始启动
schedule.enter(inc, 0, perform_command, (cmd, inc))
# 持续运行,直到计划时间队列变成空为止
schedule.run()
#每隔一天调用getMovieList.py程序
timming_exe("getMovieList.py", 60 * 60 * 24)

  • 通过百度地图API,标准化地址
from urllib.request import urlopen
from urllib.parse import urlencode
from urllib.error import URLError
import json
class xBaiduMap:
def __init__(self, key='mgf2Gxr7EgnfPVQnpClZnsug'):
self.host = 'http://api.map.baidu.com'
self.path = '/geocoder?'
self.param = {'address': None, 'output': 'json', 'key': key, 'location': None, 'city': None}
def getLocation(self, address, city=None):
rlt = self.geocoding('address', address, city)
if rlt != None:
l = rlt['result']
if isinstance(l, list):
return None
return l['location']['lat'], l['location']['lng']
def getAddress(self, lat, lng):
rlt = self.geocoding('location', "{0},{1}".format(lat, lng))
if rlt != None:
l = rlt['result']
#return l['formatted_address']
# Here you can get more details about the location with 'addressComponent' key
ld=rlt['result']['addressComponent']
return (ld['city']+';'+ld['district']+';'+ld['street']+";"+ld['street_number'])
def geocoding(self, key, value, city=None):
if key == 'location':
if 'city' in self.param:
del self.param['city']
if 'address' in self.param:
del self.param['address']
elif key == 'address':
if 'location' in self.param:
del self.param['location']
if city == None and 'city' in self.param:
del self.param['city']
else:
self.param['city'] = city
self.param[key] = value
try:
r = urlopen(self.host + self.path + urlencode(self.param)).read()
except URLError:
print ("URLError")
return None
str_response = r.decode('utf-8')
rlt = json.loads(str_response)
if rlt['status'] == 'OK':
return rlt
else:
print ("Decoding Failed")
return None

  • 多进程
import multiprocessing
for process_id in range(PROCESS_NUM):
p = multiprocessing.Process(target=worker, args=(process_id,))
jobs.append(p)
p.start()

  • 文件切割小程序
def split_file(file_name, file_num):
#文件已经存在
if(os.path.exists("split_0.txt")):
return
#统计文件的总行数
count = -1
file = open(file_name, encoding='utf-8')
for count, line in enumerate(file):
pass
count += 1
file.close()
#每个文件的行数
count_per_file = count / file_num
#创建file_num个新文件
for i in range(file_num):
file = open("split_" + str(i) + ".txt", 'w', encoding='utf-8')
file.close()
#分割成file_num个新文件
file = open(file_name, encoding='utf-8')
count = -1
for count, line in enumerate(file):
file_index = (int)(count /count_per_file)
sub_file = open("split_" + str(file_index) + ".txt", "a+", encoding='utf-8')
if(sub_file != None):
sub_file.write(line)

  • python操作DB2
import ibm_db
con = ibm_db.connect("DATABASE=FMRT;HOSTNAME=XX.XX.XX.XX;PORT=60000;PORTOCOL=TCPIP;UID=db2inst1;PWD=db2inst1;", "", "")
sql = getSql(inputfile)
stmt = ibm_db.exec_immediate(con, sql)
result = ibm_db.fetch_both(stmt)
rowidx = 0
while (result):
#DO SOMETHING
result = ibm_db.fetch_both(stmt)
ibm_db.close(con)

  • jieba中文分词
import jieba
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
for line in seg_list:
print(line)
print("Full Mode: " + "/ ".join(seg_list))  # 全模式
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list))  # 精确模式
seg_list = jieba.cut("他来到了网易杭研大厦")  # 默认是精确模式
print(", ".join(seg_list))
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造")  # 搜索引擎模式
print(", ".join(seg_list))

  • 月末判断
import calendar
import sys
def isMonthEnd(datetime):
year = int(datetime[0:4])
month = int(datetime[4:6])
day = int(datetime[6:8])
wday, monthrange = calendar.monthrange(year, month)
if(day == monthrange):
return 1
else:
return 0
isMonthEnd(sys.argv[1])

  • 移除中文分隔符
cmd = "sed ':a;N;$ s/\\r\\n//g;ba' " + oldfile + " > " + newfile
os.system(cmd)

  • 多线程
# -*- coding: utf-8 -*-
"""
thread
~~~~~~~~~~~~~~~~
Thread framework
:copyright: (c) 2016 by why.
:license: MIT, see LICENSE for more details.
"""
import threading
class Threadconfig():
def __init__(self, thread_size):
self.thread_size = thread_size
def topen(self):
self.thread_tasks = []
def build(self, func, **kwargs):
self.thread_task = threading.Thread(target=func, kwargs=(kwargs))
self.thread_tasks.append(self.thread_task)
def run(self):
for thread_task in self.thread_tasks:
thread_task.setDaemon(True)
thread_task.start()
while 1:
alive = False
for thread_num in range(0, self.thread_size):
alive = alive or self.thread_tasks[thread_num].isAlive()
if not alive:
break
def __del__(self):
self.thread_tasks = []

  • python 安装wheel
pip install *.wheel

转载于:https://www.cnblogs.com/kernel521/p/6855359.html

python的一些常用操作相关推荐

  1. python图像处理的常用操作

    python图像处理的常用操作 裁剪 持续更新 裁剪 import cv2img = cv2.imread("1.jpg") print(img.shape) cropped = ...

  2. Python之Numpy常用操作

    Numpy常用操作 文章目录 Numpy常用操作 1.数组初始化 2.数组基本运算 3.多维数组组合 4.数组中查找最大/小索引 5.查找符合条件元素索引 6.更改数组维度 1.数组初始化 # 生成特 ...

  3. 3.1 Python 字符串类型常用操作及内置方法

    文章目录 1. Str 字符串 1.1 字符串 1.2 反斜杠 1. 字符串跨行书写 2. 转义字符 1.3 打印引号 1. 错误示例 2. 解决方法 1.4 三引号 1.5 原始字符串 1. 正确示 ...

  4. python+selenium浏览器常用操作(一)

    1.导入selenium自定义的webdriver.后续可通过调用此协议启动各大浏览器 from selenium import webdriver #浏览器 from selenium.webdri ...

  5. python数据框常用操作_转载:python数据框的操作

    我们接着上次分享给大家的两篇文章:Python数据分析之numpy学习(一)和Python数据分析之numpy学习(二),继续讨论使用Python中的pandas模块进行数据分.在接下来的两期pand ...

  6. cmd 进入mysql-python_MySQL在cmd和python下的常用操作

    环境配置1:安装mysql,环境变量添加mysql的bin目录 环境配置2:python安装MySQL-Python 请根据自身操作系统下载安装,否则会报c ++ compile 9.0,import ...

  7. Python:peewee常用操作CRUD

    Defining models is similar to Django or SQLAlchemy 译文:定义模型类似于Django或SQLAlchemy 目录 1.数据库 Database 1.1 ...

  8. Python字典的常用操作

    先来安利一下:字典的键必须是可哈希的.(通俗理解就是不可变的,比如,int,str,tuple.因为这样就可以将你这个键固定好,查的时候很快!)1.字典的增加1)赋值操作D[key] = value2 ...

  9. python字符串,列表常用操作

    24天养成一个好习惯,第五天! 一.字符串需要掌握的操作 1.取值(索引取值)需要注意的是只能取,不能改 1 msg = 'hello world' 2 print(msg[4]) 2.切片(顾头不顾 ...

最新文章

  1. 破解人工智能系统的四种攻击方法!
  2. Windows10下通过anaconda安装tensorflow
  3. 星特朗望远镜怎么样_入手曝光评测双筒望远镜星特朗和博冠有何区别?哪个好?体验报告揭秘...
  4. 【Ubuntu-Opencv】Ubuntu14.04 Opencv3.3.0 安装配置及测试
  5. win7-elasticsearch环境搭建
  6. 计算机应用基础人才培养方案,1. 培养方案(计算机应用基础课程).doc
  7. js 加总数组中某一列_JS数组求和的常用方法实例小结
  8. leetcode743. 网络延迟时间(迪杰斯特拉算法)
  9. 逻辑人渴望控制那些让他们感兴趣的东西
  10. git-对比不同-版本与文件的对比,版本与版本的对比
  11. wkhtmltopdf的介绍与使用
  12. 面试官:如果要存ip地址,用什么数据类型比较好?
  13. 统计自然语言处理---信息论基础
  14. 网页在线客服代码-侧边悬浮在线客服/QQ/微信/电话代码
  15. Cropper详细笔记
  16. 使用Python找丑数
  17. 基于python的论文摘要怎么写_Python实现文章摘要的提取方式
  18. Mac系统解决matplotlib无法显示中文字体
  19. POJ 1008 玛雅日历
  20. 所有人望向黑洞那一刻,我们短暂地共享了 5500 万光年外的世界

热门文章

  1. ViewConfiguration.getScaledTouchSlop () 用法
  2. mysql 数据类型详解_MySQL笔记之数据类型详解
  3. java private 变量_java 的private的用法保护成员变量的值,将值判断
  4. linux导入函数包失败,使用qsub运行shellscript时出现apos;文件意外结束apos;和apos;错误导入函数定义apos;错误 中国服务器网...
  5. java最终类最终方法_Java中,什么是最终类与最终方法?它们的作用是什么?
  6. java float x=26f_东软java笔试题
  7. python归并排序 分词_python-归并排序
  8. html中透明度怎么写,css中控制透明度
  9. Red Hat忘记root密码,重置root管理员密码
  10. 日志中的秘密 Windows登录类型都有哪些