爬虫xpath路径是什么(利用xpath爬取lianjia租房信息)
import requests
from lxml import etree
import time
import pymysql
class MyMysql(object):
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','******','wang')
self.cursor = self.db.cursor()
def excute_sql(self,sql,data):
self.cursor.execute(sql,data)
self.db.commit()
def __del__(self):
self.cursor.close()
self.db.close()
sql = 'insert into lianjia_jinan(title,region,zone,meters,price,date,url) values(%s,%s,%s,%s,%s,%s,%s)'
msq = MyMysql()
for i in range(1,4):
url = 'https://jn.lianjia.com/zufang/pg%srco10/' % i
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url,headers=headers)
html = response.text
html_ele = etree.HTML(html)
# 找到ul里的所有li
li_list = html_ele.xpath('//ul[@id="house-lst"]/li')
# print(len(li_list))
for res in li_list:
title = res.xpath('./div[2]/h2/a')[0].text
# print(title)
url = res.xpath('./div[2]/h2/a/@href')[0]
# print(url)
region = res.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
# print(region)
zone = res.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
# print(zone)
meters = res.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
# print(meters)
price = res.xpath('./div[2]/div[2]/div[1]/span')[0].text
# print(price)
date = res.xpath('./div[2]/div[2]/div[2]')[0].text
data = (title,region,zone,meters,price,date,url)
msq.excute_sql(sql,data)
time.sleep(1)
# print(date)
print('第{}页保存完毕'.format(i))
,
免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com