add and or selector
parent
69245e8c03
commit
aca165b132
|
@ -14,4 +14,6 @@ import java.lang.annotation.Target;
|
|||
public @interface HelpUrl {
|
||||
|
||||
String[] value();
|
||||
|
||||
String sourceRegion() default "";
|
||||
}
|
||||
|
|
|
@ -15,4 +15,6 @@ public @interface TargetUrl {
|
|||
|
||||
String[] value();
|
||||
|
||||
String sourceRegion() default "";
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午5:29 <br>
|
||||
*/
|
||||
public class AndSelector implements Selector {
|
||||
|
||||
private List<Selector> selectors = new ArrayList<Selector>();
|
||||
|
||||
public AndSelector(Selector... selectors) {
|
||||
for (Selector selector : selectors) {
|
||||
this.selectors.add(selector);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String select(String text) {
|
||||
for (Selector selector : selectors) {
|
||||
if (text == null) {
|
||||
return null;
|
||||
}
|
||||
text = selector.select(text);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(String text) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
boolean first = true;
|
||||
for (Selector selector : selectors) {
|
||||
if (first) {
|
||||
results = selector.selectList(text);
|
||||
first = false;
|
||||
} else {
|
||||
List<String> resultsTemp = new ArrayList<String>();
|
||||
for (String result : results) {
|
||||
resultsTemp.addAll(selector.selectList(result));
|
||||
}
|
||||
results = resultsTemp;
|
||||
if (results == null || results.size() == 0) {
|
||||
return results;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午5:29 <br>
|
||||
*/
|
||||
public class OrSelector implements Selector {
|
||||
|
||||
private List<Selector> selectors = new ArrayList<Selector>();
|
||||
|
||||
public OrSelector(Selector... selectors) {
|
||||
for (Selector selector : selectors) {
|
||||
this.selectors.add(selector);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String select(String text) {
|
||||
for (Selector selector : selectors) {
|
||||
text = selector.select(text);
|
||||
if (text!=null){
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(String text) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
for (Selector selector : selectors) {
|
||||
List<String> strings = selector.selectList(text);
|
||||
results.addAll(strings);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
|
@ -10,7 +10,7 @@ import java.util.List;
|
|||
* Time: 下午10:18 <br>
|
||||
*/
|
||||
@TargetUrl("http://my.oschina.net/flashsword/blog/*")
|
||||
public class OschinaBlog implements AfterExtractor<OschinaBlog>{
|
||||
public class OschinaBlog implements AfterExtractor<OschinaBlog> {
|
||||
|
||||
@ExtractBy("//title")
|
||||
private String title;
|
||||
|
@ -23,5 +23,6 @@ public class OschinaBlog implements AfterExtractor<OschinaBlog>{
|
|||
|
||||
@Override
|
||||
public void afterProcess(Page page, OschinaBlog oschinaBlog) {
|
||||
content = null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,9 +4,6 @@ import org.junit.Ignore;
|
|||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Site;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-8-1 <br>
|
||||
|
@ -17,7 +14,6 @@ public class TestFetcher {
|
|||
@Ignore("takes long")
|
||||
@Test
|
||||
public void test() {
|
||||
System.out.println(List.class.isAssignableFrom(ArrayList.class));
|
||||
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
|
||||
.run();
|
||||
|
||||
|
|
Loading…
Reference in New Issue