Merge pull request #414 from jsbd/master

新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js
master
Yihua Huang 2017-01-21 10:50:48 +08:00 committed by GitHub
commit f29a10472f
1 changed files with 48 additions and 10 deletions

View File

@ -29,6 +29,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
public PhantomJSDownloader() {
this.initPhantomjsCrawlPath();
}
/**
* phantomjs
*
@ -37,13 +38,50 @@ public class PhantomJSDownloader extends AbstractDownloader {
* phantomjs --ignore-ssl-errors=yes https
* /usr/local/bin/phantomjs IOException
*
* @param phantomJsCommand phantomJsCommand
* @param phantomJsCommand
*/
public PhantomJSDownloader(String phantomJsCommand) {
this.initPhantomjsCrawlPath();
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
}
/**
* crawl.jsjarruntime.exec()phantomjs使jarcrawl.js
*
* crawl.js start -->>
*
* var system = require('system');
* var url = system.args[1];
*
* var page = require('webpage').create();
* page.settings.loadImages = false;
* page.settings.resourceTimeout = 5000;
*
* page.open(url, function (status) {
* if (status != 'success') {
* console.log("HTTP request failed!");
* } else {
* console.log(page.content);
* }
*
* page.close();
* phantom.exit();
* });
*
* <<-- crawl.js end
* js使
*
* example:
* new PhantomJSDownloader("/your/path/phantomjs", "/your/path/crawl.js");
*
* @param phantomJsCommand
* @param crawlJsPath
*/
public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) {
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
PhantomJSDownloader.crawlJsPath = crawlJsPath;
}
private void initPhantomjsCrawlPath() {
PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js ";
}
@ -86,7 +124,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
try {
String url = request.getUrl();
Runtime runtime = Runtime.getRuntime();
Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url);
Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + " " + url);
InputStream is = process.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer();