add and or selector

master
yihua.huang 2013-08-03 17:38:36 +08:00
parent 69245e8c03
commit aca165b132
6 changed files with 100 additions and 5 deletions

View File

@ -14,4 +14,6 @@ import java.lang.annotation.Target;
public @interface HelpUrl {
String[] value();
String sourceRegion() default "";
}

View File

@ -15,4 +15,6 @@ public @interface TargetUrl {
String[] value();
String sourceRegion() default "";
}

View File

@ -0,0 +1,53 @@
package us.codecraft.webmagic.selector;
import java.util.ArrayList;
import java.util.List;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-8-3 <br>
* Time: 5:29 <br>
*/
public class AndSelector implements Selector {
private List<Selector> selectors = new ArrayList<Selector>();
public AndSelector(Selector... selectors) {
for (Selector selector : selectors) {
this.selectors.add(selector);
}
}
@Override
public String select(String text) {
for (Selector selector : selectors) {
if (text == null) {
return null;
}
text = selector.select(text);
}
return text;
}
@Override
public List<String> selectList(String text) {
List<String> results = new ArrayList<String>();
boolean first = true;
for (Selector selector : selectors) {
if (first) {
results = selector.selectList(text);
first = false;
} else {
List<String> resultsTemp = new ArrayList<String>();
for (String result : results) {
resultsTemp.addAll(selector.selectList(result));
}
results = resultsTemp;
if (results == null || results.size() == 0) {
return results;
}
}
}
return results;
}
}

View File

@ -0,0 +1,41 @@
package us.codecraft.webmagic.selector;
import java.util.ArrayList;
import java.util.List;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-8-3 <br>
* Time: 5:29 <br>
*/
public class OrSelector implements Selector {
private List<Selector> selectors = new ArrayList<Selector>();
public OrSelector(Selector... selectors) {
for (Selector selector : selectors) {
this.selectors.add(selector);
}
}
@Override
public String select(String text) {
for (Selector selector : selectors) {
text = selector.select(text);
if (text!=null){
return text;
}
}
return null;
}
@Override
public List<String> selectList(String text) {
List<String> results = new ArrayList<String>();
for (Selector selector : selectors) {
List<String> strings = selector.selectList(text);
results.addAll(strings);
}
return results;
}
}

View File

@ -10,7 +10,7 @@ import java.util.List;
* Time: 10:18 <br>
*/
@TargetUrl("http://my.oschina.net/flashsword/blog/*")
public class OschinaBlog implements AfterExtractor<OschinaBlog>{
public class OschinaBlog implements AfterExtractor<OschinaBlog> {
@ExtractBy("//title")
private String title;
@ -23,5 +23,6 @@ public class OschinaBlog implements AfterExtractor<OschinaBlog>{
@Override
public void afterProcess(Page page, OschinaBlog oschinaBlog) {
content = null;
}
}

View File

@ -4,9 +4,6 @@ import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Site;
import java.util.ArrayList;
import java.util.List;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-8-1 <br>
@ -17,7 +14,6 @@ public class TestFetcher {
@Ignore("takes long")
@Test
public void test() {
System.out.println(List.class.isAssignableFrom(ArrayList.class));
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
.run();