create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
- .register("https", SSLConnectionSocketFactory.getSocketFactory())
+ .register("https", buildSSLConnectionSocketFactory())
.build();
connectionManager = new PoolingHttpClientConnectionManager(reg);
connectionManager.setDefaultMaxPerRoute(100);
}
+ private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
+ try {
+ return new SSLConnectionSocketFactory(createIgnoreVerifySSL()); // 优先绕过安全证书
+ } catch (KeyManagementException e) {
+ logger.error("ssl connection fail", e);
+ } catch (NoSuchAlgorithmException e) {
+ logger.error("ssl connection fail", e);
+ }
+ return SSLConnectionSocketFactory.getSocketFactory();
+ }
+
+ private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
+ // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
+ X509TrustManager trustManager = new X509TrustManager() {
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ }
+
+ @Override
+ public X509Certificate[] getAcceptedIssuers() {
+ return null;
+ }
+
+ };
+
+ SSLContext sc = SSLContext.getInstance("SSLv3");
+ sc.init(null, new TrustManager[] { trustManager }, null);
+ return sc;
+ }
+
public HttpClientGenerator setPoolSize(int poolSize) {
connectionManager.setMaxTotal(poolSize);
return this;
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 76ee2d9..a48bdd0 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.6.1-SNAPSHOT
+ 0.6.2-SNAPSHOT
4.0.0
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
index 88fa7c0..fd0cc47 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
@@ -29,21 +29,60 @@ public class PhantomJSDownloader extends AbstractDownloader {
public PhantomJSDownloader() {
this.initPhantomjsCrawlPath();
}
+
/**
- * 添加新的构造函数,支持phantomjs自定义命令
- *
- * example:
- * phantomjs.exe 支持windows环境
- * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
- * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException
- *
- * @param phantomJsCommand phantomJsCommand
- */
+ * 添加新的构造函数,支持phantomjs自定义命令
+ *
+ * example:
+ * phantomjs.exe 支持windows环境
+ * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
+ * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException
+ *
+ * @param phantomJsCommand
+ */
public PhantomJSDownloader(String phantomJsCommand) {
this.initPhantomjsCrawlPath();
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
}
+ /**
+ * 新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js
+ *
+ * crawl.js start --
+ *
+ * var system = require('system');
+ * var url = system.args[1];
+ *
+ * var page = require('webpage').create();
+ * page.settings.loadImages = false;
+ * page.settings.resourceTimeout = 5000;
+ *
+ * page.open(url, function (status) {
+ * if (status != 'success') {
+ * console.log("HTTP request failed!");
+ * } else {
+ * console.log(page.content);
+ * }
+ *
+ * page.close();
+ * phantom.exit();
+ * });
+ *
+ * -- crawl.js end
+ *
+ * 具体项目时可以将以上js代码复制下来使用
+ *
+ * example:
+ * new PhantomJSDownloader("/your/path/phantomjs", "/your/path/crawl.js");
+ *
+ * @param phantomJsCommand phantomJsCommand
+ * @param crawlJsPath crawlJsPath
+ */
+ public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) {
+ PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
+ PhantomJSDownloader.crawlJsPath = crawlJsPath;
+ }
+
private void initPhantomjsCrawlPath() {
PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js ";
}
@@ -86,7 +125,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
try {
String url = request.getUrl();
Runtime runtime = Runtime.getRuntime();
- Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url);
+ Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + " " + url);
InputStream is = process.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer();
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
index a8aaecf..cfb4a82 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
@@ -45,7 +45,7 @@ public class SpiderMonitor {
*
* @param spiders spiders
* @return this
- * @throws JMException
+ * @throws JMException JMException
*/
public synchronized SpiderMonitor register(Spider... spiders) throws JMException {
for (Spider spider : spiders) {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
index 9e83b6d..7695c66 100755
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
@@ -102,7 +102,7 @@ public class DoubleKeyMap extends MultiKeyMapBase {
/**
* @param key1 key1
- * @return
+ * @return map
*/
public Map remove(K1 key1) {
Map remove = map.remove(key1);
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 8ac4d5b..eed2b77 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.6.1-SNAPSHOT
+ 0.6.2-SNAPSHOT
4.0.0
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index bfbe4e3..9b8b732 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.6.1-SNAPSHOT
+ 0.6.2-SNAPSHOT
4.0.0
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 6095a57..3c6f673 100755
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.6.1-SNAPSHOT
+ 0.6.2-SNAPSHOT
4.0.0
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index fdd5dfb..6ddc61c 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.6.1-SNAPSHOT
+ 0.6.2-SNAPSHOT
4.0.0