爬取千图网代码
import re
import urllib.request
url_baba = "https://www.58pic.com/newpic/32681878.html"
data = urllib.request.urlopen(url_baba).read().decode('gbk','ignore')
tj = 'content="//preview.(.*?)!w1024_water'
mp4_tj = 'data-src="//pic.qiantucdn.com/58pic/(.*?)"'
title_tj = '<span class="pic-title fl">(.*?)</span>'
p = re.compile(tj).findall(str(data))
tit = re.compile(title_tj).findall(str(data))
mp4_p = re.compile(mp4_tj).findall(str(data))
h_z = p[0][-4:]
for i in range(len(p)):
if mp4_p ==[]:
pass
else:
print("发现视频,开始下载")
mp4_hz = mp4_p[0][-4:]
url = "http://pic.qiantucdn.com/58pic/" + str(mp4_p[0])
file = "F:/bing/千图网无损/" + str(tit[0]) + str(mp4_hz)
urllib.request.urlretrieve(url, filename=file)
print("下载成功:" + str(tit[0]))
continue
url = "http://pic." + str(p)
file = "F:/bing/千图网无损/" + str(tit[0]) +str(h_z)
urllib.request.urlretrieve(url,filename=file)
print("下载成功:" +str(tit[0]))