us.codecraft
webmagic-core
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExpressionType.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExpressionType.java
index bd84be3..d873e65 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExpressionType.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExpressionType.java
@@ -2,7 +2,6 @@ package us.codecraft.webmagic.configurable;
/**
* @author code4crafter@gmail.com
- * @date 14-4-5
*/
public enum ExpressionType {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExtractRule.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExtractRule.java
index 82337c4..bbc48dd 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExtractRule.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ExtractRule.java
@@ -7,7 +7,6 @@ import static us.codecraft.webmagic.selector.Selectors.*;
/**
* @author code4crafter@gmail.com
- * @date 14-4-5
*/
public class ExtractRule {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
index 2292788..88fa7c0 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
@@ -37,7 +37,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
* phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
* /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException
*
- * @param phantomJsCommand
+ * @param phantomJsCommand phantomJsCommand
*/
public PhantomJSDownloader(String phantomJsCommand) {
this.initPhantomjsCrawlPath();
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/CompositePageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/CompositePageProcessor.java
index 2073445..b7a39ed 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/CompositePageProcessor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/CompositePageProcessor.java
@@ -9,7 +9,6 @@ import java.util.List;
/**
* @author code4crafter@gmail.com
- * @date 14-4-5
*/
public class CompositePageProcessor implements PageProcessor {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/SubPageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/SubPageProcessor.java
index 12f62df..f7baad7 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/SubPageProcessor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/handler/SubPageProcessor.java
@@ -4,7 +4,6 @@ import us.codecraft.webmagic.Page;
/**
* @author code4crafter@gmail.com
- * @date 14-4-5
*/
public interface SubPageProcessor extends RequestMatcher {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
index cae0d1d..a8aaecf 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
@@ -45,6 +45,7 @@ public class SpiderMonitor {
*
* @param spiders spiders
* @return this
+ * @throws JMException
*/
public synchronized SpiderMonitor register(Spider... spiders) throws JMException {
for (Spider spider : spiders) {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component/BloomFilterDuplicateRemover.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemover.java
similarity index 90%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component/BloomFilterDuplicateRemover.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemover.java
index 6d5e597..db84302 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component/BloomFilterDuplicateRemover.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemover.java
@@ -1,9 +1,16 @@
-package us.codecraft.webmagic.scheduler.component;
+package us.codecraft.webmagic.scheduler;
+
+/**
+ * @author code4crafter@gmail.com
+ * Date: 16/12/18
+ * Time: 上午10:23
+ */
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import java.nio.charset.Charset;
import java.util.concurrent.atomic.AtomicInteger;
@@ -67,4 +74,4 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
public int getTotalRequestsCount(Task task) {
return counter.get();
}
-}
+}
\ No newline at end of file
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java
similarity index 97%
rename from webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java
rename to webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java
index f8e0b9a..39c2b6a 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java
@@ -3,7 +3,6 @@ package us.codecraft.webmagic.scheduler;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Request;
-import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/BaiduNews.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/BaiduNews.java
index 4795662..e83d944 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/BaiduNews.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/BaiduNews.java
@@ -6,7 +6,6 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
/**
* @author code4crafter@gmail.com
- * @date 14-4-9
*/
public class BaiduNews {
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/QQMeishi.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/QQMeishi.java
index f4f8591..8120e35 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/QQMeishi.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/QQMeishi.java
@@ -8,7 +8,6 @@ import us.codecraft.webmagic.model.annotation.TargetUrl;
/**
* @author code4crafter@gmail.com
- * @date 14-4-11
*/
@TargetUrl("http://meishi.qq.com/beijing/c/all[\\-p2]*")
@ExtractBy(value = "//ul[@id=\"promos_list2\"]/li",multi = true)
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/OschinaBlogPageProcesser.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/OschinaBlogPageProcesser.java
index 2e3996c..e6db04e 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/OschinaBlogPageProcesser.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/OschinaBlogPageProcesser.java
@@ -5,8 +5,8 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
-import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
import javax.management.JMException;
import java.util.List;
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/PhantomJSPageProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/PhantomJSPageProcessor.java
index b4f1936..99d5fa8 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/PhantomJSPageProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/PhantomJSPageProcessor.java
@@ -13,7 +13,7 @@ import java.util.List;
/**
* Created by dolphineor on 2014-11-21.
- *
+ *
* 以淘宝为例, 搜索冬装的相关结果
*/
public class PhantomJSPageProcessor implements PageProcessor {
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/pipeline/OneFilePipeline.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/pipeline/OneFilePipeline.java
index 9cb1bc2..4f38ecb 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/pipeline/OneFilePipeline.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/pipeline/OneFilePipeline.java
@@ -19,9 +19,6 @@ public class OneFilePipeline extends FilePersistentBase implements Pipeline {
private PrintWriter printWriter;
- /**
- * create a FilePipeline with default path"/data/webmagic/"
- */
public OneFilePipeline() throws FileNotFoundException, UnsupportedEncodingException {
this("/data/webmagic/");
}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
index 57a923e..0423e58 100755
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.scripts;
-import com.google.common.collect.Sets;
import org.apache.commons.cli.*;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
@@ -8,6 +7,7 @@ import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
+import us.codecraft.webmagic.utils.WMCollections;
import java.util.HashMap;
import java.util.List;
@@ -29,8 +29,8 @@ public class ScriptConsole {
private static Map> alias = new HashMap>();
static {
- alias.put(Language.JavaScript, Sets.newHashSet("js", "javascript", "JavaScript", "JS"));
- alias.put(Language.JRuby, Sets.newHashSet("ruby", "jruby", "Ruby", "JRuby"));
+ alias.put(Language.JavaScript, WMCollections.newHashSet("js", "javascript", "JavaScript", "JS"));
+ alias.put(Language.JRuby, WMCollections.newHashSet("ruby", "jruby", "Ruby", "JRuby"));
}
public void setLanguagefromArg(String arg) {
@@ -93,7 +93,7 @@ public class ScriptConsole {
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
pageProcessor.getSite().setSleepTime(params.getSleepTime());
pageProcessor.getSite().setRetryTimes(3);
- pageProcessor.getSite().setAcceptStatCode(Sets.newHashSet(200, 404,403, 500,502));
+ pageProcessor.getSite().setAcceptStatCode(WMCollections.newHashSet(200, 404,403, 500,502));
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
spider.clearPipeline().addPipeline(new Pipeline() {
@Override