diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java index 1746048..a8ed995 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java @@ -14,4 +14,6 @@ import java.lang.annotation.Target; public @interface HelpUrl { String[] value(); + + String sourceRegion() default ""; } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java index 7bbb962..77b5a82 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java @@ -15,4 +15,6 @@ public @interface TargetUrl { String[] value(); + String sourceRegion() default ""; + } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java new file mode 100644 index 0000000..98481ef --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java @@ -0,0 +1,53 @@ +package us.codecraft.webmagic.selector; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author yihua.huang@dianping.com
+ * @date: 13-8-3
+ * Time: 下午5:29
+ */ +public class AndSelector implements Selector { + + private List selectors = new ArrayList(); + + public AndSelector(Selector... selectors) { + for (Selector selector : selectors) { + this.selectors.add(selector); + } + } + + @Override + public String select(String text) { + for (Selector selector : selectors) { + if (text == null) { + return null; + } + text = selector.select(text); + } + return text; + } + + @Override + public List selectList(String text) { + List results = new ArrayList(); + boolean first = true; + for (Selector selector : selectors) { + if (first) { + results = selector.selectList(text); + first = false; + } else { + List resultsTemp = new ArrayList(); + for (String result : results) { + resultsTemp.addAll(selector.selectList(result)); + } + results = resultsTemp; + if (results == null || results.size() == 0) { + return results; + } + } + } + return results; + } +} diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java new file mode 100644 index 0000000..2cdd870 --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java @@ -0,0 +1,41 @@ +package us.codecraft.webmagic.selector; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author yihua.huang@dianping.com
+ * @date: 13-8-3
+ * Time: 下午5:29
+ */ +public class OrSelector implements Selector { + + private List selectors = new ArrayList(); + + public OrSelector(Selector... selectors) { + for (Selector selector : selectors) { + this.selectors.add(selector); + } + } + + @Override + public String select(String text) { + for (Selector selector : selectors) { + text = selector.select(text); + if (text!=null){ + return text; + } + } + return null; + } + + @Override + public List selectList(String text) { + List results = new ArrayList(); + for (Selector selector : selectors) { + List strings = selector.selectList(text); + results.addAll(strings); + } + return results; + } +} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java index c82ef23..85d4817 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java @@ -10,7 +10,7 @@ import java.util.List; * Time: 下午10:18
*/ @TargetUrl("http://my.oschina.net/flashsword/blog/*") -public class OschinaBlog implements AfterExtractor{ +public class OschinaBlog implements AfterExtractor { @ExtractBy("//title") private String title; @@ -23,5 +23,6 @@ public class OschinaBlog implements AfterExtractor{ @Override public void afterProcess(Page page, OschinaBlog oschinaBlog) { + content = null; } } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java index f4525f0..289cd4f 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java @@ -4,9 +4,6 @@ import org.junit.Ignore; import org.junit.Test; import us.codecraft.webmagic.Site; -import java.util.ArrayList; -import java.util.List; - /** * @author yihua.huang@dianping.com
* @date: 13-8-1
@@ -17,7 +14,6 @@ public class TestFetcher { @Ignore("takes long") @Test public void test() { - System.out.println(List.class.isAssignableFrom(ArrayList.class)); OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class) .run();