利用htmlunit下載網頁上的文件

碼頭工人 8年前發布 | 4K 次閱讀 Java

import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
public class DownloadFile {
    public static void main(String[] args) throws Exception {
        String baseUrl = "<a href=";
        String bihuaRegex = "class=\"guanggao\"[^<]<[^<]<param\sname=\"movie\"\svalue=\"([^\"])";
        String aSoundRegex = "class=\"js12\">ā.?name=\"FlashVars\"\svalue=\"f=([^\"])";
        String eSoundRegex = "class=\"js12\">ē.?name=\"FlashVars\"\svalue=\"f=([^\"]*)";
        WebClient client = new WebClient();
        client.getOptions().setCssEnabled(false);
        client.getOptions().setJavaScriptEnabled(false);
        client.getOptions().setThrowExceptionOnFailingStatusCode(false);
        client.getOptions().setThrowExceptionOnScriptError(false);
        Page page = client.getPage(baseUrl);
        String source = page.getWebResponse().getContentAsString();
        Matcher mBihuan = Regex(source, bihuaRegex);
        Matcher mA = Regex(source, aSoundRegex);
        Matcher mE = Regex(source, eSoundRegex);
        while(mBihuan.find()) {
            String url = "<a href="

public static Matcher Regex(String source, String regex) {
    Pattern p = Pattern.compile(regex, Pattern.DOTALL);
    return p.matcher(source);
}

public static void saveFile(Page page, String file) throws Exception {
    InputStream is = page.getWebResponse().getContentAsStream();
    FileOutputStream output = new FileOutputStream(file);
    IOUtils.copy(is, output);
    output.close();
}

}</pre>

本文由用戶 admin 自行上傳分享，僅供網友學習交流。所有權歸原作者，若您的權利被侵害，請聯系管理員。

轉載本站原創文章，請注明出處，并保留原始鏈接、圖片水印。

本站是一個以用戶分享為主的開源技術平臺，歡迎各類分享！

本文地址：http://www.baiduhome.net/code/view/1454595280651

Java

利用htmlunit下載網頁上的文件

相關代碼

相關文檔

相關經驗

目錄