From d1f2e65e5d798936e535ceb809671133185c25d9 Mon Sep 17 00:00:00 2001 From: Jsbd Date: Thu, 8 Dec 2016 14:28:48 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E6=96=B0=E5=A2=9EPhantomJSDownloader?= =?UTF-8?q?=E6=9E=84=E9=80=A0=E5=87=BD=E6=95=B0=EF=BC=8C=E6=94=AF=E6=8C=81?= =?UTF-8?q?crawl.js=E8=B7=AF=E5=BE=84=E8=87=AA=E5=AE=9A=E4=B9=89=EF=BC=8C?= =?UTF-8?q?=E5=9B=A0=E4=B8=BA=E5=BD=93=E5=85=B6=E4=BB=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E6=AD=A4jar=E5=8C=85=E6=97=B6=EF=BC=8Cruntim?= =?UTF-8?q?e.exec()=E6=89=A7=E8=A1=8Cphantomjs=E5=91=BD=E4=BB=A4=E6=97=B6?= =?UTF-8?q?=E6=97=A0=E4=BD=BF=E7=94=A8=E6=B3=95jar=E5=8C=85=E4=B8=AD?= =?UTF-8?q?=E7=9A=84crawl.js?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../codecraft/webmagic/downloader/PhantomJSDownloader.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java index 2292788..bea44fd 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java @@ -44,6 +44,11 @@ public class PhantomJSDownloader extends AbstractDownloader { PhantomJSDownloader.phantomJsCommand = phantomJsCommand; } + public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) { + PhantomJSDownloader.phantomJsCommand = phantomJsCommand; + PhantomJSDownloader.crawlJsPath = crawlJsPath; + } + private void initPhantomjsCrawlPath() { PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js "; } @@ -86,7 +91,7 @@ public class PhantomJSDownloader extends AbstractDownloader { try { String url = request.getUrl(); Runtime runtime = Runtime.getRuntime(); - Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url); + Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + " " + url); InputStream is = process.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); StringBuffer stringBuffer = new StringBuffer(); From 1b886d48a2de1f621305a802faca5d94f72f7f86 Mon Sep 17 00:00:00 2001 From: Jsbd Date: Thu, 8 Dec 2016 14:29:42 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=E6=96=B0=E5=A2=9EPhantomJSDownloader?= =?UTF-8?q?=E6=9E=84=E9=80=A0=E5=87=BD=E6=95=B0=EF=BC=8C=E6=94=AF=E6=8C=81?= =?UTF-8?q?crawl.js=E8=B7=AF=E5=BE=84=E8=87=AA=E5=AE=9A=E4=B9=89=EF=BC=8C?= =?UTF-8?q?=E5=9B=A0=E4=B8=BA=E5=BD=93=E5=85=B6=E4=BB=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E6=AD=A4jar=E5=8C=85=E6=97=B6=EF=BC=8Cruntim?= =?UTF-8?q?e.exec()=E6=89=A7=E8=A1=8Cphantomjs=E5=91=BD=E4=BB=A4=E6=97=B6?= =?UTF-8?q?=E6=97=A0=E4=BD=BF=E7=94=A8=E6=B3=95jar=E5=8C=85=E4=B8=AD?= =?UTF-8?q?=E7=9A=84crawl.js?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../downloader/PhantomJSDownloader.java | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java index bea44fd..a955e73 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java @@ -29,21 +29,54 @@ public class PhantomJSDownloader extends AbstractDownloader { public PhantomJSDownloader() { this.initPhantomjsCrawlPath(); } + /** - * 添加新的构造函数,支持phantomjs自定义命令 - * - * example: - * phantomjs.exe 支持windows环境 - * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误 - * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException - * - * @param phantomJsCommand - */ + * 添加新的构造函数,支持phantomjs自定义命令 + * + * example: + * phantomjs.exe 支持windows环境 + * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误 + * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException + * + * @param phantomJsCommand + */ public PhantomJSDownloader(String phantomJsCommand) { this.initPhantomjsCrawlPath(); PhantomJSDownloader.phantomJsCommand = phantomJsCommand; } + /** + * 新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js + * + * crawl.js start -->> + * + * var system = require('system'); + * var url = system.args[1]; + * + * var page = require('webpage').create(); + * page.settings.loadImages = false; + * page.settings.resourceTimeout = 5000; + * + * page.open(url, function (status) { + * if (status != 'success') { + * console.log("HTTP request failed!"); + * } else { + * console.log(page.content); + * } + * + * page.close(); + * phantom.exit(); + * }); + * + * <<-- crawl.js end + * 具体项目时可以将以上js代码复制下来使用 + * + * example: + * new PhantomJSDownloader("/your/path/phantomjs", "/your/path/crawl.js"); + * + * @param phantomJsCommand + * @param crawlJsPath + */ public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) { PhantomJSDownloader.phantomJsCommand = phantomJsCommand; PhantomJSDownloader.crawlJsPath = crawlJsPath; From e4af05a6f24430ccbcc4ceb875482c73465d4e44 Mon Sep 17 00:00:00 2001 From: "Ckex.zha" Date: Wed, 18 Jan 2017 17:28:01 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E7=BB=95=E8=BF=87=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E8=AF=81=E4=B9=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../downloader/HttpClientGenerator.java | 55 +++++++++++++++++-- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index 80a7e29..fdd740f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -1,5 +1,15 @@ package us.codecraft.webmagic.downloader; +import java.io.IOException; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.util.Map; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpException; import org.apache.http.HttpRequest; @@ -14,16 +24,19 @@ import org.apache.http.config.SocketConfig; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.impl.client.*; +import org.apache.http.impl.client.BasicCookieStore; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.impl.cookie.BasicClientCookie; import org.apache.http.protocol.HttpContext; + import us.codecraft.webmagic.Site; import us.codecraft.webmagic.proxy.Proxy; -import java.io.IOException; -import java.util.Map; - /** * @author code4crafter@gmail.com
* @since 0.4.0 @@ -33,14 +46,46 @@ public class HttpClientGenerator { private PoolingHttpClientConnectionManager connectionManager; public HttpClientGenerator() { + SSLConnectionSocketFactory sslConnectionSocketFactory = null; + try { + sslConnectionSocketFactory = new SSLConnectionSocketFactory(createIgnoreVerifySSL()); + } catch (Exception e) { + e.printStackTrace(); + } Registry reg = RegistryBuilder.create() .register("http", PlainConnectionSocketFactory.INSTANCE) - .register("https", SSLConnectionSocketFactory.getSocketFactory()) +// .register("https", SSLConnectionSocketFactory.getSocketFactory()) + .register("https", sslConnectionSocketFactory) .build(); connectionManager = new PoolingHttpClientConnectionManager(reg); connectionManager.setDefaultMaxPerRoute(100); } + private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { + SSLContext sc = SSLContext.getInstance("SSLv3"); + + // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法 + X509TrustManager trustManager = new X509TrustManager() { + @Override + public void checkClientTrusted(java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public void checkServerTrusted(java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + + sc.init(null, new TrustManager[] { trustManager }, null); + return sc; + } + public HttpClientGenerator setPoolSize(int poolSize) { connectionManager.setMaxTotal(poolSize); return this; From 0dc26c8ca0f2e82f5ba49e0c064bfca28a6dbc21 Mon Sep 17 00:00:00 2001 From: "Ckex.zha" Date: Fri, 20 Jan 2017 14:03:26 +0800 Subject: [PATCH 04/10] optimize code. --- .../downloader/HttpClientGenerator.java | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index 5675cb7..f4e589f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; import java.util.Map; import javax.net.ssl.SSLContext; @@ -11,6 +12,7 @@ import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; @@ -33,6 +35,8 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.impl.cookie.BasicClientCookie; import org.apache.http.protocol.HttpContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.proxy.Proxy; @@ -42,46 +46,51 @@ import us.codecraft.webmagic.proxy.Proxy; * @since 0.4.0 */ public class HttpClientGenerator { - + + private transient Logger logger = LoggerFactory.getLogger(getClass()); + private PoolingHttpClientConnectionManager connectionManager; public HttpClientGenerator() { - SSLConnectionSocketFactory sslConnectionSocketFactory = null; - try { - sslConnectionSocketFactory = new SSLConnectionSocketFactory(createIgnoreVerifySSL()); - } catch (Exception e) { - e.printStackTrace(); - } Registry reg = RegistryBuilder.create() .register("http", PlainConnectionSocketFactory.INSTANCE) -// .register("https", SSLConnectionSocketFactory.getSocketFactory()) - .register("https", sslConnectionSocketFactory) + .register("https", buildSSLConnectionSocketFactory()) .build(); connectionManager = new PoolingHttpClientConnectionManager(reg); connectionManager.setDefaultMaxPerRoute(100); } - private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { - SSLContext sc = SSLContext.getInstance("SSLv3"); + private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() { + try { + return new SSLConnectionSocketFactory(createIgnoreVerifySSL()); // 优先绕过安全证书 + } catch (KeyManagementException e) { + logger.error(ExceptionUtils.getStackTrace(e)); + } catch (NoSuchAlgorithmException e) { + logger.error(ExceptionUtils.getStackTrace(e)); + } + return SSLConnectionSocketFactory.getSocketFactory(); + } + private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法 X509TrustManager trustManager = new X509TrustManager() { + @Override - public void checkClientTrusted(java.security.cert.X509Certificate[] paramArrayOfX509Certificate, - String paramString) throws CertificateException { + public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { } @Override - public void checkServerTrusted(java.security.cert.X509Certificate[] paramArrayOfX509Certificate, - String paramString) throws CertificateException { + public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { } @Override - public java.security.cert.X509Certificate[] getAcceptedIssuers() { + public X509Certificate[] getAcceptedIssuers() { return null; } + }; - + + SSLContext sc = SSLContext.getInstance("SSLv3"); sc.init(null, new TrustManager[] { trustManager }, null); return sc; } From 407fbb613080102e3658727955d8731e22de3722 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 11:05:54 +0800 Subject: [PATCH 05/10] refactor logger#445 --- .../downloader/HttpClientGenerator.java | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index f4e589f..1a0b2bd 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -1,18 +1,6 @@ package us.codecraft.webmagic.downloader; -import java.io.IOException; -import java.security.KeyManagementException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.CertificateException; -import java.security.cert.X509Certificate; -import java.util.Map; - -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; - import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; @@ -26,21 +14,25 @@ import org.apache.http.config.SocketConfig; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.impl.client.BasicCookieStore; -import org.apache.http.impl.client.BasicCredentialsProvider; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.client.*; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.impl.cookie.BasicClientCookie; import org.apache.http.protocol.HttpContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import us.codecraft.webmagic.Site; import us.codecraft.webmagic.proxy.Proxy; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import java.io.IOException; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; +import java.util.Map; + /** * @author code4crafter@gmail.com
* @since 0.4.0 @@ -64,10 +56,10 @@ public class HttpClientGenerator { try { return new SSLConnectionSocketFactory(createIgnoreVerifySSL()); // 优先绕过安全证书 } catch (KeyManagementException e) { - logger.error(ExceptionUtils.getStackTrace(e)); - } catch (NoSuchAlgorithmException e) { - logger.error(ExceptionUtils.getStackTrace(e)); - } + logger.error("ssl connection fail", e); + } catch (NoSuchAlgorithmException e) { + logger.error("ssl connection fail", e); + } return SSLConnectionSocketFactory.getSocketFactory(); } From d60615f50390d7fc51d50bf60214f79124965206 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 11:29:42 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BD=BF=E7=94=A8start?= =?UTF-8?q?Urls=E6=B2=A1=E6=9C=89=E8=AE=BE=E7=BD=AEdomain=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E4=BD=BF=E7=94=A8cookie=E7=A9=BA=E6=8C=87=E9=92=88?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98#438?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index bb7cb3f..b1afb66 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -292,7 +292,7 @@ public class Spider implements Runnable, Task { } if (startRequests != null) { for (Request request : startRequests) { - scheduler.push(request, this); + addRequest(request); } startRequests.clear(); } From f45e2f118b236f7a8e502f015658ce76af153835 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 11:38:36 +0800 Subject: [PATCH 07/10] for release --- pom.xml | 4 ++-- webmagic-core/pom.xml | 2 +- webmagic-extension/pom.xml | 2 +- .../webmagic/downloader/PhantomJSDownloader.java | 9 ++++++--- .../us/codecraft/webmagic/monitor/SpiderMonitor.java | 2 +- .../java/us/codecraft/webmagic/utils/DoubleKeyMap.java | 2 +- webmagic-samples/pom.xml | 2 +- webmagic-saxon/pom.xml | 2 +- webmagic-scripts/pom.xml | 2 +- webmagic-selenium/pom.xml | 2 +- 10 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 46b60e8..987b3c6 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 7 us.codecraft - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 pom @@ -38,7 +38,7 @@ scm:git:git@github.com:code4craft/webmagic.git scm:git:git@github.com:code4craft/webmagic.git git@github.com:code4craft/webmagic.git - webmagic-parent-0.6.0 + webmagic-parent-0.6.1 diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index d789539..8bda0f6 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml index 76ee2d9..85b869a 100644 --- a/webmagic-extension/pom.xml +++ b/webmagic-extension/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java index a955e73..fa271dd 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java @@ -39,6 +39,7 @@ public class PhantomJSDownloader extends AbstractDownloader { * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException * * @param phantomJsCommand + * @return this */ public PhantomJSDownloader(String phantomJsCommand) { this.initPhantomjsCrawlPath(); @@ -47,7 +48,7 @@ public class PhantomJSDownloader extends AbstractDownloader { /** * 新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js - * + *
      * crawl.js start -->>
      * 
      *   var system = require('system');
@@ -69,13 +70,15 @@ public class PhantomJSDownloader extends AbstractDownloader {
      *   });
      *   
      * <<-- crawl.js end
+     * 
* 具体项目时可以将以上js代码复制下来使用 * * example: * new PhantomJSDownloader("/your/path/phantomjs", "/your/path/crawl.js"); * - * @param phantomJsCommand - * @param crawlJsPath + * @param phantomJsCommand phantomJsCommand + * @param crawlJsPath crawlJsPath + * @return this */ public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) { PhantomJSDownloader.phantomJsCommand = phantomJsCommand; diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java index a8aaecf..cfb4a82 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java @@ -45,7 +45,7 @@ public class SpiderMonitor { * * @param spiders spiders * @return this - * @throws JMException + * @throws JMException JMException */ public synchronized SpiderMonitor register(Spider... spiders) throws JMException { for (Spider spider : spiders) { diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java index 9e83b6d..7695c66 100755 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java @@ -102,7 +102,7 @@ public class DoubleKeyMap extends MultiKeyMapBase { /** * @param key1 key1 - * @return + * @return map */ public Map remove(K1 key1) { Map remove = map.remove(key1); diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml index 8ac4d5b..bc0595d 100644 --- a/webmagic-samples/pom.xml +++ b/webmagic-samples/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml index bfbe4e3..fbac822 100644 --- a/webmagic-saxon/pom.xml +++ b/webmagic-saxon/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml index 6095a57..0b334c8 100755 --- a/webmagic-scripts/pom.xml +++ b/webmagic-scripts/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index fdd5dfb..ccd07e3 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.1 4.0.0 From 3e633c6871ffc4e3b90aa08ee4879170cbdb4c6f Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 11:51:14 +0800 Subject: [PATCH 08/10] version --- pom.xml | 2 +- webmagic-core/pom.xml | 2 +- webmagic-extension/pom.xml | 2 +- .../codecraft/webmagic/downloader/PhantomJSDownloader.java | 6 ++---- webmagic-samples/pom.xml | 2 +- webmagic-saxon/pom.xml | 2 +- webmagic-scripts/pom.xml | 2 +- webmagic-selenium/pom.xml | 2 +- 8 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index 987b3c6..e5a40b8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 7 us.codecraft - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 pom diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index 8bda0f6..d789539 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml index 85b869a..76ee2d9 100644 --- a/webmagic-extension/pom.xml +++ b/webmagic-extension/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java index fa271dd..fd0cc47 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java @@ -39,7 +39,6 @@ public class PhantomJSDownloader extends AbstractDownloader { * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException * * @param phantomJsCommand - * @return this */ public PhantomJSDownloader(String phantomJsCommand) { this.initPhantomjsCrawlPath(); @@ -49,7 +48,7 @@ public class PhantomJSDownloader extends AbstractDownloader { /** * 新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js *
-     * crawl.js start -->>
+     * crawl.js start --
      * 
      *   var system = require('system');
      *   var url = system.args[1];
@@ -69,7 +68,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
      *       phantom.exit();
      *   });
      *   
-     * <<-- crawl.js end
+     * -- crawl.js end
      * 
* 具体项目时可以将以上js代码复制下来使用 * @@ -78,7 +77,6 @@ public class PhantomJSDownloader extends AbstractDownloader { * * @param phantomJsCommand phantomJsCommand * @param crawlJsPath crawlJsPath - * @return this */ public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) { PhantomJSDownloader.phantomJsCommand = phantomJsCommand; diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml index bc0595d..8ac4d5b 100644 --- a/webmagic-samples/pom.xml +++ b/webmagic-samples/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml index fbac822..bfbe4e3 100644 --- a/webmagic-saxon/pom.xml +++ b/webmagic-saxon/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml index 0b334c8..6095a57 100755 --- a/webmagic-scripts/pom.xml +++ b/webmagic-scripts/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index ccd07e3..fdd5dfb 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1 + 0.6.1-SNAPSHOT 4.0.0 From aaccc932155e106df7b8bcbfd81a60c190abcbc6 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 12:04:12 +0800 Subject: [PATCH 09/10] new version --- pom.xml | 2 +- webmagic-core/pom.xml | 2 +- webmagic-extension/pom.xml | 2 +- webmagic-samples/pom.xml | 2 +- webmagic-saxon/pom.xml | 2 +- webmagic-scripts/pom.xml | 2 +- webmagic-selenium/pom.xml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index e5a40b8..0743c02 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 7 us.codecraft - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 pom diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index d789539..fbd5034 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml index 76ee2d9..a48bdd0 100644 --- a/webmagic-extension/pom.xml +++ b/webmagic-extension/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml index 8ac4d5b..eed2b77 100644 --- a/webmagic-samples/pom.xml +++ b/webmagic-samples/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml index bfbe4e3..9b8b732 100644 --- a/webmagic-saxon/pom.xml +++ b/webmagic-saxon/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml index 6095a57..3c6f673 100755 --- a/webmagic-scripts/pom.xml +++ b/webmagic-scripts/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index fdd5dfb..6ddc61c 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.6.1-SNAPSHOT + 0.6.2-SNAPSHOT 4.0.0 From 76076e51d8a1f4c16847943dddcb856a7f8b3287 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 21 Jan 2017 22:19:09 +0800 Subject: [PATCH 10/10] update version in readme --- README-zh.md | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README-zh.md b/README-zh.md index 36059a4..d69dd63 100644 --- a/README-zh.md +++ b/README-zh.md @@ -38,12 +38,12 @@ webmagic使用maven管理依赖,在项目中添加对应的依赖即可使用w us.codecraft webmagic-core - 0.6.0 + 0.6.1 us.codecraft webmagic-extension - 0.6.0 + 0.6.1 ``` diff --git a/README.md b/README.md index 0dc200a..285eb60 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,12 @@ Add dependencies to your pom.xml: us.codecraft webmagic-core - 0.6.0 + 0.6.1 us.codecraft webmagic-extension - 0.6.0 + 0.6.1 ```