import urllib.requestimport reimport osre_img = re.compile(r'') # 预编译正则, 提高代码效率re_url = re.compile(r'http://aimm\.92game\.net/xinggan/(\d+)\.html')f2 = open("456.txt")headers = { 'user-agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36', # 伪造手机UA来访问手机端网页}pic_num = 0os.chdir("G:\\meizhi-image")for each_url in f2: each_url = 'http://m.aimm.92game.net/n.php?id=' + re_url.findall(each_url)[0] # 把电脑版url转换手机版方便抓取 url_image = urllib.request.Request(each_url, headers=headers) url_image2 = urllib.request.urlopen(url_image).read().decode("utf-8") url_image3 = re_img.findall(url_image2) for each in url_image3: pic_num += 1 url_image4 = (each) path = (str(pic_num) + '.jpg') print("... ... 第"+str(pic_num)+"只妹纸正在被保存... ...") image = urllib.request.urlopen(url_image4) image1 = image.read() f = open(path, 'wb') f.write(image1) f.close()
python3.4 第一只爬虫,主要用到urllib,request。正则表达式。代码比较渣渣