Commit dbb32845 by sanshi

all

parent 8bff7ae5
# -*- coding: utf-8 -*-
# 爬虫测试
# 作者: 陈磊
# 时间: 2019-01-14
import requests
import json
import xlwt
import xlrd
from lxml import etree
class Bank(object):
def __init__(self, bank=None, province=None, city=None, key=None):
"""
:param bank: 域名
:param province: 省份ID
:param city: 城市ID
:param key: 关键词
"""
self.host = "http://www.lianhanghao.com/index.php"
self.headers = {}
# self.path = "/user/addStore.do"
self.url = self.host
self.params = {}
self.json = {}
self.response = ""
self.bank = bank
self.province = province
self.city = city
self.key = key
self.text = ""
def getResponse(self, x):
url = self.url + "/Index/index/bank/" + str(self.bank) + "/province/" + str(self.province) + "/city/" + str(self.city) + "/p/" + str(x) + ".html"
self.response = requests.get(url=url)
self.text = self.response.text
class Ajax(object):
def __init__(self, _id):
"""
:param _id: 省份ID
"""
self.host = "http://www.lianhanghao.com/index.php/Index/Ajax"
self.headers = {}
self.url = self.host
self.params = {}
self.json = {}
self.response = ""
self._id = _id
self.text = ""
def getResponse(self):
self.params = {
"id": self._id
}
self.response = requests.get(url=self.url, params=self.params)
try:
self.text = json.loads(self.response.text[3:])
except json.decoder.JSONDecodeError:
self.text = []
# xpath 解析html信息
def xpath_parse_banks(html):
et_html = etree.HTML(html)
bank_list = []
for x in range(1, 11):
path_1 = "/html/body/div[3]/div[3]/table/tbody/tr[" + str(x) + "]/td[1]"
path_2 = "/html/body/div[3]/div[3]/table/tbody/tr[" + str(x) + "]/td[2]"
try:
urls_1 = et_html.xpath(path_1)
urls_2 = et_html.xpath(path_2)
for each_1 in urls_1:
bank_id = each_1.text.strip()
if bank_id == "":
pass
else:
for each_2 in urls_2:
bank_name = each_2.text.strip()
bank_list.append([bank_id, bank_name])
except AttributeError:
pass
return bank_list
class Main(object):
def __init__(self, name, num):
self.banks = [1, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 69, 16, 17, 18, 23]
self.provinces = ["北京市", "天津市", "河北省", "山西省", "内蒙古自治区", "辽宁省", "吉林省", "黑龙江省", "上海市", "江苏省", "浙江省",
"安徽省", "福建省", "江西省", "山东省", "河南省", "湖北省", "湖南省", "广东省", "广西壮族自治区", "海南省", "四川省",
"重庆市", "贵州省", "云南省", "西藏自治区", "陕西省", "甘肃省", "青海省", "宁夏回族自治区", "新疆维吾尔自治区",
"台湾", "香港", "澳门"]
self.workbook = xlwt.Workbook(encoding="ascii")
self.worksheet = self.workbook.add_sheet(name)
self.file_name = name + ".xls"
self.num = num
def save_ajax(self):
rows = 0
# 根据省份ID获取城市ID
for x, y in enumerate(self.provinces):
api = Ajax(_id=x+1)
api.getResponse()
cities = api.text
print(cities)
for m, n in enumerate(cities):
rows = rows + 1
name = n["name"]
# 城市ID
_id = n["id"]
self.worksheet.write(rows, 0, label=x+1)
self.worksheet.write(rows, 1, label=_id)
self.worksheet.write(rows, 2, label=y)
self.worksheet.write(rows, 3, label=name)
# 保存省份城市ID
self.workbook.save("ajax.xls")
def start(self):
rows = 0
# 根据文件中省份ID城市ID
data = xlrd.open_workbook("ajax.xls")
table = data.sheets()[0]
nrows = table.nrows
for x in range(1, nrows):
for y in range(1, 999):
api = Bank()
# 银行ID
# 打印查询数据
print(table.cell(x, 2).value, table.cell(x, 3).value)
api.bank = self.banks[self.num]
api.province = table.cell(x, 0).value
api.city = table.cell(x, 1).value
api.getResponse(x=y)
# 获取银行列表
banks = xpath_parse_banks(api.text)
print(banks)
if not banks:
# 银行列表为空即结束
break
else:
for bank in banks:
rows = rows + 1
self.worksheet.write(rows, 0, label=table.cell(x, 2).value)
self.worksheet.write(rows, 1, label=table.cell(x, 3).value)
self.worksheet.write(rows, 2, label=bank[0])
self.worksheet.write(rows, 3, label=bank[1])
self.workbook.save(self.file_name)
if __name__ == '__main__':
# Main().start()
# Main().save_ajax()
# 已完成
# Main(name="中国工商银行", num=0).start()
# Main(name="中国农业银行", num=1).start()
# Main(name="中国银行", num=2).start()
# Main(name="中国建设银行", num=3).start()
# Main(name="交通银行", num=4).start()
# Main(name="中国邮政储蓄银行", num=5).start()
# Main(name="中信银行", num=6).start()
# Main(name="中国光大银行", num=7).start()
# Main(name="华夏银行", num=8).start()
# Main(name="中国民生银行", num=9).start()
# Main(name="广东发展银行", num=10).start()
# Main(name="平安银行", num=11).start()
# Main(name="招商银行", num=12).start()
# Main(name="兴业银行", num=13).start()
# Main(name="上海浦东发展银行", num=14).start()
Main(name="渤海银行", num=15).start()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment