Requests爬取百度图库图片(老司机第一弹)
# coding=utf-8
import re, requests
def geta(url, params=None):
session=requests.session()
ret={}
ret['success']=False
try:
if params:
session.params=params
msg=session.get(url)
if msg:
ret['success']=True
ret['content']=msg.content
except Exception, e:
print e.message
finally:
if session:
session.close()
return ret
def ceshi(name, page):
for p in xrange(30, page * 30 + 1, 30):
params={'pn': p, 'step_word': name}
url='image.baidu/search/index?tn=baiduimage&ie=utf-8&word=%s' % name
ret=geta(url=url, params=params)
if ret['success']==True:
jieguo=ret['content']
urls=re.findall('"objURL":"(.*?)"', jieguo)
for i in xrange(0, len(urls)):
ll=geta(urls[i])
print '第%s张图片正在下載!!!' % (i + 1)
if ll['success']==False:
continue
with open(r'D:\meizitu\a%s%s.jpg' % (p, i + 1), 'wb')as f:
f.write(ll['content'])
print '已经下载%s张%s图!' % (p, name)
if __name__=='__main__':
name=raw_input("请输入要下载的图片名称:")
page=int(raw_input("请输入要下载的页数:"))
ceshi(name, page)
#学习路径#