processor

master
yihua.huang 2013-08-01 22:53:44 +08:00
parent 50edd22ef6
commit c5cf05640a
3 changed files with 6 additions and 4 deletions

View File

@ -27,7 +27,7 @@ public class CssSelector implements Selector {
public String select(String text) { public String select(String text) {
Document doc = Jsoup.parse(text); Document doc = Jsoup.parse(text);
Elements elements = doc.select(selectorText); Elements elements = doc.select(selectorText);
if (CollectionUtils.isNotEmpty(elements)) { if (CollectionUtils.isEmpty(elements)) {
return null; return null;
} }
return elements.get(0).outerHtml(); return elements.get(0).outerHtml();

View File

@ -5,13 +5,13 @@ package us.codecraft.webmagic.annotation;
* @date: 13-8-1 <br> * @date: 13-8-1 <br>
* Time: 10:18 <br> * Time: 10:18 <br>
*/ */
@TargetUrl("http://djjchobits.iteye.com/blog/\\d+") @TargetUrl("http://my.oschina.net/flashsword/blog/*")
public class Blog { public class Blog {
@Fetcher("//title") @Fetcher("//title")
private String title; private String title;
@Fetcher(value = "div#main",type = Fetcher.Type.Css) @Fetcher(value = "div.BlogContent",type = Fetcher.Type.Css)
private String content; private String content;
@Override @Override

View File

@ -1,5 +1,6 @@
package us.codecraft.webmagic.annotation; package us.codecraft.webmagic.annotation;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
@ -11,9 +12,10 @@ import us.codecraft.webmagic.Spider;
*/ */
public class TestFetcher { public class TestFetcher {
@Ignore("takes long")
@Test @Test
public void test() { public void test() {
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://djjchobits.iteye.com/blog/569000"), Blog.class)).run(); Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), Blog.class)).run();
} }