爬虫xpath路径是什么(利用xpath爬取lianjia租房信息)

import requests

from lxml import etree

import time

import pymysql

class MyMysql(object):

def __init__(self):

self.db = pymysql.connect('127.0.0.1','root','******','wang')

self.cursor = self.db.cursor()

def excute_sql(self,sql,data):

self.cursor.execute(sql,data)

self.db.commit()

def __del__(self):

self.cursor.close()

self.db.close()

sql = 'insert into lianjia_jinan(title,region,zone,meters,price,date,url) values(%s,%s,%s,%s,%s,%s,%s)'

msq = MyMysql()

for i in range(1,4):

url = 'https://jn.lianjia.com/zufang/pg%srco10/' % i

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/68.0.3440.106 Safari/537.36'

}

response = requests.get(url,headers=headers)

html = response.text

html_ele = etree.HTML(html)

# 找到ul里的所有li

li_list = html_ele.xpath('//ul[@id="house-lst"]/li')

# print(len(li_list))

for res in li_list:

title = res.xpath('./div[2]/h2/a')[0].text

# print(title)

url = res.xpath('./div[2]/h2/a/@href')[0]

# print(url)

region = res.xpath('./div[2]/div[1]/div[1]/a/span')[0].text

# print(region)

zone = res.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text

# print(zone)

meters = res.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text

# print(meters)

price = res.xpath('./div[2]/div[2]/div[1]/span')[0].text

# print(price)

date = res.xpath('./div[2]/div[2]/div[2]')[0].text

data = (title,region,zone,meters,price,date,url)

msq.excute_sql(sql,data)

time.sleep(1)

# print(date)

print('第{}页保存完毕'.format(i))

爬虫xpath路径是什么(利用xpath爬取lianjia租房信息)(1)

,

免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com

    分享
    投诉
    首页