Python多線程結合隊列下載百度音樂代碼
[Python]代碼
#!/usr/bin/python # -*- coding: utf-8 -*- ''' 百度中批量下載某歌手的歌(目前只下載第一頁,可以自行拓展) @author:admin @qq: 1243385033 ''' import threading, urllib2, os,re,sys from bs4 import BeautifulSoup from Queue import Queue '''目標歌手''' SINGER = u'亞東' '''保存路徑''' SAVE_FOLDER = 'F:/music/' # 查詢url search_url = "http://music.baidu.com/search/song?key=%s&s=1" # 百度音樂播放盒url song_url = "http://box.zhangmen.baidu.com/x?op=12&count=1&mtype=1&title=" class Downloader(threading.Thread): def __init__(self, task): threading.Thread.__init__(self) self.task = task def run(self): '''覆蓋父類的run方法''' while True: url = self.task.get() self.download(url) self.task.task_done() def build_path(self, filename): join = os.path.join parentPath=join(SAVE_FOLDER,SINGER) filename = filename + '.mp3' myPath = join(parentPath, filename) return myPath def download(self, url): '''下載文件''' sub_url = url.items() f_name = sub_url[0][0] req_url = sub_url[0][1] handle = urllib2.urlopen(req_url) # 保存路徑 save_path = self.build_path(f_name) with open(save_path, "wb") as handler: while True: chunk = handle.read(1024) if not chunk: break handler.write(chunk) msg = u"已經從 %s下載完成" % req_url sys.stdout.write(msg) sys.stdout.flush() class HttpRequest: def __init__(self): self.task = [] self.reg_decode = re.compile('<decode>.*?CDATA\[(.*?)\]].*?</decode>') self.reg_encode = re.compile('<encode>.*?CDATA\[(.*?)\]].*?</encode>') self.init() self.target_url = search_url % urllib2.quote(self.encode2utf8(SINGER)) def encode2utf8(self,source): if source and isinstance(source,(str,unicode)): source=source.encode("utf8") return source return source def mkDir(self, dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) def init(self): self.mkDir(SAVE_FOLDER) subPath = os.path.join(SAVE_FOLDER, SINGER) self.mkDir(subPath) def http_request(self): global song_url '''發起請求''' response=urllib2.urlopen(self.target_url) # 獲取頭信息 content = response.read() response.close() # 使用BeautifulSoup html = BeautifulSoup(content, from_encoding="utf8") # 提取HTML標簽 span_tag = html.find_all('div', {"monkey":"song-list"})[0].find_all('span', class_='song-title') # 遍歷List for a_tag in span_tag: song_name = unicode(a_tag.find_all("a")[0].get_text()) song_url = song_url + urllib2.quote(self.encode2utf8(song_name)) song_url = song_url + '$$' + urllib2.quote(self.encode2utf8(SINGER)) + '$$$$&url=&listenreelect=0&.r=0.1696378872729838' xmlfile = urllib2.urlopen(song_url) xml_content = xmlfile.read() xmlfile.close() url1 = re.findall(self.reg_encode, xml_content) url2 = re.findall(self.reg_decode, xml_content) if not url1 or not url2: continue url = url1[0][:url1[0].rindex('/') + 1] + url2[0] self.task.append({song_name:url}) return self.task def start_download(urls): #創建一個隊列 quene=Queue() #獲取list的大小 size=len(urls) #開啟線程 for _ in xrange(size): t=Downloader(quene) t.setDaemon(True) t.start() #入隊列 for url in urls: quene.put(url) quene.join() if __name__=='__main__': http=HttpRequest() urls=http.http_request() start_download(urls)
本文由用戶 LasonyaHart 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!