Python抓取花瓣網圖片腳本
#!/usr/bin/env python-- encoding:utf-8 --
author :insun
http://yxmhero1989.blog.163.com/blog/static/112157956201311994027168/
import urllib, urllib2, re, sys, os reload(sys)
url = '
if(os.path.exists('beauty') == False): os.mkdir('beauty')
def get_huaban_beauty(): pin_id = 48145457 limit = 20 #他默認允許的limit為100 while pin_id != None: url = ' req = urllib2.Request(url, headers=i_headers) html = urllib2.urlopen(req).read() reg = re.compile('"pin_id":(.?),.+?"file":{"farm":"farm1", "bucket":"hbimg",.+?"key":"(.?)",.+?"type":"image/(.*?)"', re.S) groups = re.findall(reg, html) print str(pin_id) + "Start to catch " + str(len(groups)) + " photos" for att in groups: pin_id = att[0] att_url = att[1] + '_fw554' img_type = att[2] img_url = '
print pin_id
except: print 'error occurs'
get_huaban_beauty()</pre>