今天起我将开始介绍一些网络爬虫的教程~不足之处欢迎大家指出~
本程序将实现自动爬取百度图片相关内容~并且通过re库进行图片保存
# 批量获取图片网址
import requests
import re
want = '兰博基尼'
url = r'http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1497491098685_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&' \
r'istype=2&ie=utf-8&ctd=1497491098685%5E00_1519X735&word=' + want
photo = requests.get(url)
photo_url = re.findall('"objURL":"(.*?)",',photo.text)
print(photo_url)
for i in photo_url:
print(i)
import requests
url = "http://b-ssl.duitang.com/uploads/item/201607/21/20160721160933_kYrza.thumb.700_0.jpeg"
photo = requests.get(url)
# content 内容
f = open("a.png","wb") # 二进制模式 r+
f.write(photo.content)
f.close()
更多精彩内容