Golang網頁下載示例

fefet 9年前發布 | 6K 次閱讀 Golang

package main

/*

  • 中文編碼問題 */

import ( "errors" "flag" "fmt" query "github.com/PuerkitoBio/goquery" "golang.org/x/text/encoding/simplifiedchinese" "io/ioutil" "net/http" "os" "path/filepath" "runtime" "strings" "sync" )

var ( np = runtime.NumCPU() _ = runtime.GOMAXPROCS(np) )

var wg sync.WaitGroup

type Folder struct { Url string Dir string }

type File struct { Url string Dir string Name string }

func checkErr(err error) { if err != nil { fmt.Printf("%v\n", err.Error()) os.Exit(1) } }

func decodeToGBK(text string) (string, error) { dst := make([]byte, len(text)*2) tr := simplifiedchinese.GB18030.NewDecoder() nDst, _, err := tr.Transform(dst, []byte(text), true) if err != nil { return text, err } return string(dst[:nDst]), nil }

func printEach(index int, item *query.Selection) { fmt.Println("Selection: ", item.Text()) }

func isDir(path string) bool { return strings.HasSuffix(path, "/") }

func makeFolder(item query.Selection, url, dir string) (f Folder, err error) { tx := item.Text() href, ok := item.Attr("href") name, err := decodeToGBK(tx) if err != nil { return } if !ok { err = errors.New("makeFolder : " + tx + " href屬性不存在") return } f = &Folder{Url: url + href, Dir: filepath.Join(dir, name)} return }

func makeFile(item query.Selection, url, dir string) (f File, err error) { tx := item.Text() href, ok := item.Attr("href") if !ok { err = errors.New("makeFile : " + tx + " href屬性不存在") return } name, err := decodeToGBK(tx) if err != nil { return } f = &File{Url: url + href, Dir: dir, Name: name} return }

func crawl(url, localDir string) { doc, err := query.NewDocument(url) // checkErr(err) if err != nil { fmt.Printf("%v\n", err.Error()) return }

items := doc.Find("a")

dir := localDir

if !strings.HasSuffix(url, "/") {
    url += "/"
}

crawlEach := func(i int, item *query.Selection) {
    tx := item.Text()
    if isDir(tx) {
        folder, err := makeFolder(item, url, dir)
        if err != nil {
            fmt.Printf("%v\n", err.Error())
            return
        }
        wg.Add(1)
        go crawlFolder(folder)
    } else {
        file, err := makeFile(item, url, dir)
        if err != nil {
            fmt.Printf("%v\n", err.Error())
            return
        }
        download(file)
    }
}

items.Each(crawlEach)

}

func download(file *File) { dir := file.Dir url := file.Url name := file.Name

if err := os.MkdirAll(dir, os.ModePerm); os.IsExist(err) {
    fmt.Printf("%x is exist\n", dir)
} else {
    os.Chmod(dir, os.ModePerm)
}
resp, err := http.Get(url)
if err != nil {
    fmt.Printf("%v\n", err.Error())
    return
}
defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
    fmt.Printf("%v\n", err.Error())
    return
}

fp := string([]rune(filepath.Join(dir, name)))

err = ioutil.WriteFile(fp, body, 0777)
if err != nil {
    fmt.Printf("%v fp:[%v]\n", err.Error(), fp)
    return
}
fmt.Printf("Download: %+v\n", file)

}

func crawlFolder(folder *Folder) { url := folder.Url dir := folder.Dir

crawl(url, dir)
wg.Done()

}

func main() { host := flag.String("host", "http://localhost:8000&quot;, "HTTP服務地址Host") location := flag.String("locate", "E:/Crawler下載文件", "本地文件系統絕對路徑") flag.Parse() crawl(host, location) wg.Wait() }</pre>

 本文由用戶 fefet 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!