1.项目目录

----Project

------venv

--------main.py

--------brickseek.py

--------database.py

2.main.py

import brickseekSKU=["675353130","543873356","113247244","259271016","618763356"]
if __name__ == '__main__':for sku in SKU:brickseek.setZip(sku)

3.brickseek.py

from pymysql.converters import escape_string
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from database import Database
import time
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptionsdef exlog(sku,zip):try:f = open("errorLog.txt", 'a', encoding='utf-8')msg = "爬取失败 sku=" + str(sku) + "  zip=" + str(zip) + "\n"f.write(msg)f.close()except Exception:print()def saveData(id,store,price,quantity,sku):print("保存数据到mysql")try:d = Database().queryOne("select * from brickseek where id=%s" % id)date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())if d:print("更新")sql = "UPDATE brickseek SET sku = '%s',store='%s',quantity='%s',price='%s',time=str_to_date('%s','%%Y-%%m-%%d %%H:%%i:%%s') WHERE id = '%s'" % \(sku, escape_string(store), quantity, escape_string(price) , date, id)Database().save(sql)else:print("插入")sql = "insert into brickseek(id,sku,store,quantity,price,time) " \"values('%s', '%s', '%s', '%s', '%s', str_to_date('%s','%%Y-%%m-%%d %%H:%%i:%%s'))" % (id, sku,  escape_string(store), quantity,  escape_string(price), date)Database().save(sql)print("保存数据成功")except Exception as e:print("保存数据失败,Error:",e)def getPage(d,zip,sku):zipInput = d.find_element(By.ID, 'inventory-checker-form-zip')zipInput.send_keys(zip)print("输入zip...")but = d.find_element(By.ID,'main').find_element(By.CLASS_NAME,'bs-button')but.submit()print("正在请求zip网页数据...")wait = WebDriverWait(d, 30)wait.until(EC.presence_of_element_located((By.ID, "BrickseekVideoAdContainer")))print("解析数据...")rows = d.find_element(By.ID,'main').find_element(By.CLASS_NAME,'table__body').find_elements(By.CLASS_NAME,'table__row')print("数量=", len(rows))for row in rows:id = row.find_element(By.CLASS_NAME,'address-location-name').text.split("#")[1]list = row.find_elements(By.CLASS_NAME,'table__cell-content')store = list[0].textprice = list[2].textquantity = list[1].find_element(By.CLASS_NAME,'availability-status-indicator__text').textif "In Stock"==quantity:quantity = list[1].find_element(By.CLASS_NAME,'table__cell-quantity').text[-1]if quantity == "+": quantity = "6+"elif "Out of Stock" == quantity:quantity = "0"else:quantity = "2"saveData(id,store,price,quantity,sku)rows.clear()def setZip(sku):url = "https://brickseek.com/walmart-inventory-checker/?sku="url = url + skud = webdriver.Chrome()d.implicitly_wait(30)  # 隐性等待，最长等30秒,调用一次即可# d.maximize_window()  # 浏览器全屏显示d.set_window_size(480, 800)# 反爬d.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"""})print("正在打开网页...")d.get(url)print("正在获取网页数据...")for i in range(50):zip = Database().queryOne("select * from zip_code order by rand() limit 1")[0]try:print("url=", url, "zip=", zip)getPage(d,zip,sku)print("爬取成功 sku=", sku, "zip=", zip)except Exception as e:print("爬取失败 sku=", sku, "zip=", zip,"Error:",e)exlog(sku,zip)continue

4.database.py


import MySQLdb
class Database(object):connection = Nonedef __init__(self):if not Database.connection:Database.connection = MySQLdb.connect(host="xxx", user="xxx", passwd="xxx", database="reptile", charset='utf8')print("get connection")def queryOne(self,sql):cursor = Database.connection.cursor()cursor.execute(sql)rows = cursor.fetchone()print(rows)return rowsdef queryAll(self,sql):cursor = Database.connection.cursor()cursor.execute(sql)rows = cursor.fetchall()print(rows)return rowsdef save(self,sql):cursor = Database.connection.cursor()cursor.execute(sql)Database.connection.commit()def closeDB(self):Database.connection = Noneprint("close db ...")

【Python】爬取国外购物网站商品信息实战相关推荐

用Python爬取淘宝网商品信息
用Python爬取淘宝网商品信息转载请注明出处网购时经常会用到淘宝网点我去淘宝但淘宝网上的商品琳琅满目,于是我参照中国大学 MOOC的代码写了一个爬取淘宝网商品信息的程序代码如下: impor ...
手把手教你用python爬取人人贷网站借款人信息
P2P是近年来很热的一个行业,由于这个行业在国内兴起才不久,国内的很多学者对这个行业都兴趣盎然,在大学研究互联网金融的学者更是有一大群.小编是学金融出身,深知数据在做学术研究的重要性,之前有不少学互联 ...
python爬虫爬取当当网的商品信息
python爬虫爬取当当网的商品信息一.环境搭建二.简介三.当当网网页分析 1.分析网页的url规律 2.解析网页html页面书籍商品html页面解析其他商品html页面解析四.代码实现 ...
python使用requests库爬取淘宝指定商品信息
python使用requests库爬取淘宝指定商品信息在搜索栏中输入商品通过F12开发者工具抓包我们知道了商品信息的API,同时发现了商品数据都以json字符串的形式存储在返回的html内解析u ...
Scrapy爬取当当网的商品信息存到MySQL数据库
Scrapy爬取当当网的商品信息存到MySQL数据库 Scrapy 是一款十分强大的爬虫框架,能够快速简单地爬取网页,存到你想要的位置.经过两天的摸索,终于搞定了一个小任务,将当当网的商品信息爬下来存 ...
python爬取2019年计算机就业_2019年最新Python爬取腾讯招聘网信息代码解析
原标题:2019年最新Python爬取腾讯招聘网信息代码解析前言初学Python的小伙们在入门的时候感觉这门语言有很大的难度,但是他的用处是非常广泛的,在这里将通过实例代码讲解如何通过Python ...
python爬取58同城的兼职信息
标题python爬取58同城的兼职信息刚刚开始学习爬虫,一些自己研究的小东西,爬取58同城上面的兼职信息放入Excel表格里,具体代码解释在代码里给出注释,下面给出完整代码: #首先是导包 imp ...
用python爬取豆瓣影评及影片信息(评论时间、用户ID、评论内容)
爬虫入门:python爬取豆瓣影评及影片信息:影片评分.评论时间.用户ID.评论内容思路分析元素定位完整代码豆瓣网作为比较官方的电影评价网站,有很多对新上映影片的评价,不多说,直接进入正题. ...
python爬取抖音用户数据_使用python爬取抖音视频列表信息
如果看到特别感兴趣的抖音vlogger的视频,想全部dump下来,如何操作呢?下面介绍介绍如何使用python导出特定用户所有视频信息抓包分析 Chrome Deveploer Tools Chro ...

【Python】爬取国外购物网站商品信息实战

1.项目目录

2.main.py

3.brickseek.py

4.database.py

【Python】爬取国外购物网站商品信息实战相关推荐

最新文章

热门文章