diff --git a/pom.xml b/pom.xml
index a7eb02b..8ba03ce 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
7
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
pom
@@ -76,6 +76,16 @@
guava
15.0
+
+ org.slf4j
+ slf4j-api
+ 1.7.6
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.6
+
us.codecraft
xsoup
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index 914bfda..e64b865 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
@@ -23,7 +23,6 @@
com.google.guava
guava
- 15.0
@@ -37,8 +36,13 @@
- log4j
- log4j
+ org.slf4j
+ slf4j-api
+
+
+
+ org.slf4j
+ slf4j-log4j12
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
index a6b8dac..6a6b956 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
@@ -2,7 +2,8 @@ package us.codecraft.webmagic;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
-import org.apache.log4j.Logger;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
@@ -18,7 +19,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@@ -72,7 +76,7 @@ public class Spider implements Runnable, Task {
protected Scheduler scheduler = new QueueScheduler();
- protected Logger logger = Logger.getLogger(getClass());
+ protected Logger logger = LoggerFactory.getLogger(getClass());
protected ExecutorService executorService;
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index c11f0f1..cd8c12f 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/Inject.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/Inject.java
new file mode 100644
index 0000000..c6608ae
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/Inject.java
@@ -0,0 +1,15 @@
+package us.codecraft.webmagic.configurable;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+/**
+ * @author yihua.huang@dianping.com
+ */
+@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
+@Target({ElementType.FIELD})
+public @interface Inject {
+
+ String value() default "";
+}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/ConfigurableBlogPageProcesser.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/ConfigurableBlogPageProcesser.java
new file mode 100644
index 0000000..f5992a4
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/ConfigurableBlogPageProcesser.java
@@ -0,0 +1,51 @@
+package us.codecraft.webmagic.example;
+
+import java.util.List;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.configurable.Inject;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public class ConfigurableBlogPageProcesser implements PageProcessor {
+
+ private Site site = Site.me().setDomain("my.oschina.net");
+
+ @Inject("linkRegex")
+ private String linkRegex;
+
+ @Inject("titleXpath")
+ private String titleXpath;
+
+ @Inject("contentXpath")
+ private String contentXpath;
+
+ @Inject("tagsXpath")
+ private String tagsXpath;
+
+ @Override
+ public void process(Page page) {
+ List links = page.getHtml().links().regex(linkRegex).all();
+ page.addTargetRequests(links);
+ page.putField("title", page.getHtml().xpath(titleXpath).toString());
+ if (page.getResultItems().get("title") == null) {
+ //skip this page
+ page.setSkip(true);
+ }
+ page.putField("content", page.getHtml().smartContent().toString());
+ page.putField("tags", page.getHtml().xpath(tagsXpath).all());
+ }
+
+ @Override
+ public Site getSite() {
+ return site;
+
+ }
+
+ public static void main(String[] args) {
+ Spider.create(new ConfigurableBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run();
+ }
+}
diff --git a/webmagic-lucene/pom.xml b/webmagic-lucene/pom.xml
index 5502ca3..d8b8bc9 100644
--- a/webmagic-lucene/pom.xml
+++ b/webmagic-lucene/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-panel/pom.xml b/webmagic-panel/pom.xml
index 98f9fb3..3b0b682 100644
--- a/webmagic-panel/pom.xml
+++ b/webmagic-panel/pom.xml
@@ -5,19 +5,18 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
us.codecraft
webmagic-panel
- 0.4.3-SNAPSHOT
us.codecraft
webmagic-scripts
- 0.4.3-SNAPSHOT
+ ${project.version}
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 58f79de..c2b4b93 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index d17335e..94bb1d0 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 1c65513..5c21160 100644
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index fb33895..d73417b 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
diff --git a/webmagic-worker/pom.xml b/webmagic-worker/pom.xml
index f3bddf8..f1059db 100644
--- a/webmagic-worker/pom.xml
+++ b/webmagic-worker/pom.xml
@@ -5,20 +5,19 @@
webmagic-parent
us.codecraft
- 0.4.3-SNAPSHOT
+ 0.5.0-SNAPSHOT
4.0.0
us.codecraft
webmagic-worker
- 0.4.3-SNAPSHOT
war
us.codecraft
webmagic-scripts
- 0.4.3-SNAPSHOT
+ ${project.version}