900字范文,内容丰富有趣,生活中的好帮手!
900字范文 > python爬虫——爬取链家网北京租房信息

python爬虫——爬取链家网北京租房信息

时间:2021-03-30 07:36:25

相关推荐

python爬虫——爬取链家网北京租房信息

爬取链家网站北京租房信息

# 链家北京市租房信息,并导入本地数据库import requestsimport reimport pymysqldb = pymysql.connect('localhost', 'root', '126315', 'petzhang')cursor = db.cursor()headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.9 Safari/537.36'}def getdata(n):first_url = '/zufang/chaoyang/pg{}'.format(n)response = requests.get(first_url, headers=headers)# print(response.text)# 市区(朝阳、海淀等等)loc_data1 = re.findall(r'<a target="_blank" href="/zufang/(.*?)/a>-<a href="/zufang/', response.text)loca1 = []for m in range(len(loc_data1)):locdata1 = re.findall(r'[\u4e00-\u9fa5]+', loc_data1[m])locdata1 = ''.join(locdata1)loca1.append((locdata1))# 应该是办事处吧loca2 = re.findall(r'target="_blank">(.*?)</a>-<a title=', response.text)# 应该是小区loca3 = re.findall(r'</a>-<a title="(.*?)" href=', response.text)detail_url = re.findall(r'<a target="_blank" href="/zufang/BJ(.*?)">', response.text)# print(detail_url)url1 = []title1 = []price1=[]method1=[]leixing1=[]square1=[]chaoxiang1=[]ruzhushijian1=[]louceng1=[]dianti1=[]yongshui1=[]yongdian1=[]ranqi1=[]cainuan1=[]zuqi1=[]agent1=[]phone1=[]for i in range(len(detail_url)):detailurl = '/zufang/BJ{}'.format(detail_url[i])detail_data = requests.get(detailurl, headers=headers)#网址url = detailurlurl1.append(url)#房源标题title = re.findall(r'<p class="content__title">(.*?)</p>', detail_data.text)title1.append(title)#价格price=re.findall(r'<span>(.*?)</span>元/月', detail_data.text)price1.append(price)#租赁方式method = re.findall(r'<li><span class="label">租赁方式:</span>(.*?)</li>', detail_data.text)method1.append(method)#房屋类型leixing = re.findall(r'<li><span class="label">房屋类型:</span>(.*?)</li>', detail_data.text)leixing1.append(leixing)#面积square = re.findall(r'<li class="fl oneline">面积:(.*?)</li>', detail_data.text)square1.append(square)#朝向chaoxiang = re.findall(r'<li class="fl oneline">朝向:(.*?)</li>', detail_data.text)chaoxiang1.append(chaoxiang)#入住ruzhushijian = re.findall(r'<li class="fl oneline">入住:(.*?)</li>', detail_data.text)ruzhushijian1.append(ruzhushijian)#楼层louceng = re.findall(r'<li class="fl oneline">楼层:(.*?)</li>', detail_data.text)louceng1.append(louceng)#电梯dianti = re.findall(r'<li class="fl oneline">电梯:(.*?)</li>', detail_data.text)dianti1.append(dianti)#用水yongshui = re.findall(r'<li class="fl oneline">用水:(.*?)</li>', detail_data.text)yongshui1.append(yongshui)#用电yongdian = re.findall(r'<li class="fl oneline">用电:(.*?)</li>', detail_data.text)yongdian1.append(yongdian)#燃气ranqi = re.findall(r'<li class="fl oneline">燃气:(.*?)</li>', detail_data.text)ranqi1.append(ranqi)#采暖cainuan = re.findall(r'<li class="fl oneline">采暖:(.*?)</li>', detail_data.text)cainuan1.append(cainuan)#租期zuqi = re.findall(r'<li class="fl oneline">租期:(.*?)</li>', detail_data.text)zuqi1.append(zuqi)#代理人agent = re.findall(r'name":"(.*?)","office', detail_data.text)agent1.append(agent)#代理人联系方式phone = re.findall(r'phone400":"(.*?)","phone', detail_data.text)phone1.append(phone)print('page'+'-'+str(n))try:for j in range(len(title1)):#print(title1[j][0])sql = 'insert into `chaoyang` (`房源标题`,`网址`,`市区`,`商圈`,`小区`,`租赁方式`,`价格`,`房屋类型`,`面积`,`朝向`,`入住`,`楼层`,`电梯`,`用水`,`用电`,`燃气`,`采暖`,`租期`,`代理人`,`联系方式`) values ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")'\.format(title1[j][0], url1[j], loca1[j], loca2[j], loca3[j], method1[j][0],price1[j][0], leixing1[j][0], square1[j][0], chaoxiang1[j], ruzhushijian1[j][0],louceng1[j][0], dianti1[j][0], yongshui1[j][0], yongdian1[j][0], ranqi1[j][0],cainuan1[j][0], zuqi1[j][0], agent1[j][0], phone1[j][0])cursor.execute(sql)mit()print("已存储" + title1[j][0])except Exception as e:print(e)'''def savedata():try:for j in range(len(title1)):# print(title1[j][0])sql = 'insert into `chaoyang` (`房源标题`,`网址`,`市区`,`商圈`,`小区`,`租赁方式`,`价格`,`房屋类型`,`面积`,`朝向`,`入住`,`楼层`,`电梯`,`用水`,`用电`,`燃气`,`采暖`,`租期`,`代理人`,`联系方式`) values ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")' \.format(title1[j][0], url1[j], loca1[j], loca2[j], loca3[j], method1[j][0],price1[j][0], leixing1[j][0], square1[j][0], chaoxiang1[j], ruzhushijian1[j][0],louceng1[j][0], dianti1[j][0], yongshui1[j][0], yongdian1[j][0], ranqi1[j][0],cainuan1[j][0], zuqi1[j][0], agent1[j][0], phone1[j][0])cursor.execute(sql)mit()print("已存储" + title1[j][0])except Exception as e:print(e)'''if __name__ == '__main__':for n in range(1,101):getdata(n)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。