diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java
index 1746048..a8ed995 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java
@@ -14,4 +14,6 @@ import java.lang.annotation.Target;
public @interface HelpUrl {
String[] value();
+
+ String sourceRegion() default "";
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java
index 7bbb962..77b5a82 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java
@@ -15,4 +15,6 @@ public @interface TargetUrl {
String[] value();
+ String sourceRegion() default "";
+
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java
new file mode 100644
index 0000000..98481ef
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java
@@ -0,0 +1,53 @@
+package us.codecraft.webmagic.selector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-3
+ * Time: 下午5:29
+ */
+public class AndSelector implements Selector {
+
+ private List selectors = new ArrayList();
+
+ public AndSelector(Selector... selectors) {
+ for (Selector selector : selectors) {
+ this.selectors.add(selector);
+ }
+ }
+
+ @Override
+ public String select(String text) {
+ for (Selector selector : selectors) {
+ if (text == null) {
+ return null;
+ }
+ text = selector.select(text);
+ }
+ return text;
+ }
+
+ @Override
+ public List selectList(String text) {
+ List results = new ArrayList();
+ boolean first = true;
+ for (Selector selector : selectors) {
+ if (first) {
+ results = selector.selectList(text);
+ first = false;
+ } else {
+ List resultsTemp = new ArrayList();
+ for (String result : results) {
+ resultsTemp.addAll(selector.selectList(result));
+ }
+ results = resultsTemp;
+ if (results == null || results.size() == 0) {
+ return results;
+ }
+ }
+ }
+ return results;
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java
new file mode 100644
index 0000000..2cdd870
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java
@@ -0,0 +1,41 @@
+package us.codecraft.webmagic.selector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-3
+ * Time: 下午5:29
+ */
+public class OrSelector implements Selector {
+
+ private List selectors = new ArrayList();
+
+ public OrSelector(Selector... selectors) {
+ for (Selector selector : selectors) {
+ this.selectors.add(selector);
+ }
+ }
+
+ @Override
+ public String select(String text) {
+ for (Selector selector : selectors) {
+ text = selector.select(text);
+ if (text!=null){
+ return text;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public List selectList(String text) {
+ List results = new ArrayList();
+ for (Selector selector : selectors) {
+ List strings = selector.selectList(text);
+ results.addAll(strings);
+ }
+ return results;
+ }
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java
index c82ef23..85d4817 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java
@@ -10,7 +10,7 @@ import java.util.List;
* Time: 下午10:18
*/
@TargetUrl("http://my.oschina.net/flashsword/blog/*")
-public class OschinaBlog implements AfterExtractor{
+public class OschinaBlog implements AfterExtractor {
@ExtractBy("//title")
private String title;
@@ -23,5 +23,6 @@ public class OschinaBlog implements AfterExtractor{
@Override
public void afterProcess(Page page, OschinaBlog oschinaBlog) {
+ content = null;
}
}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java
index f4525f0..289cd4f 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java
@@ -4,9 +4,6 @@ import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Site;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* @author yihua.huang@dianping.com
* @date: 13-8-1
@@ -17,7 +14,6 @@ public class TestFetcher {
@Ignore("takes long")
@Test
public void test() {
- System.out.println(List.class.isAssignableFrom(ArrayList.class));
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
.run();