利用htmlunit下載網頁上的文件
import java.io.FileOutputStream; import java.io.InputStream; import java.util.regex.Matcher; import java.util.regex.Pattern;import org.apache.commons.io.IOUtils;
import com.gargoylesoftware.htmlunit.Page; import com.gargoylesoftware.htmlunit.WebClient;
public class DownloadFile { public static void main(String[] args) throws Exception { String baseUrl = "<a href="; String bihuaRegex = "class=\"guanggao\"[^<]<[^<]<param\sname=\"movie\"\svalue=\"([^\"])"; String aSoundRegex = "class=\"js12\">ā.?name=\"FlashVars\"\svalue=\"f=([^\"])"; String eSoundRegex = "class=\"js12\">ē.?name=\"FlashVars\"\svalue=\"f=([^\"]*)"; WebClient client = new WebClient(); client.getOptions().setCssEnabled(false); client.getOptions().setJavaScriptEnabled(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setThrowExceptionOnScriptError(false); Page page = client.getPage(baseUrl); String source = page.getWebResponse().getContentAsString(); Matcher mBihuan = Regex(source, bihuaRegex); Matcher mA = Regex(source, aSoundRegex); Matcher mE = Regex(source, eSoundRegex); while(mBihuan.find()) { String url = "<a href="
public static Matcher Regex(String source, String regex) { Pattern p = Pattern.compile(regex, Pattern.DOTALL); return p.matcher(source); } public static void saveFile(Page page, String file) throws Exception { InputStream is = page.getWebResponse().getContentAsStream(); FileOutputStream output = new FileOutputStream(file); IOUtils.copy(is, output); output.close(); }
}</pre>