processor
parent
50edd22ef6
commit
c5cf05640a
|
@ -27,7 +27,7 @@ public class CssSelector implements Selector {
|
|||
public String select(String text) {
|
||||
Document doc = Jsoup.parse(text);
|
||||
Elements elements = doc.select(selectorText);
|
||||
if (CollectionUtils.isNotEmpty(elements)) {
|
||||
if (CollectionUtils.isEmpty(elements)) {
|
||||
return null;
|
||||
}
|
||||
return elements.get(0).outerHtml();
|
||||
|
|
|
@ -5,13 +5,13 @@ package us.codecraft.webmagic.annotation;
|
|||
* @date: 13-8-1 <br>
|
||||
* Time: 下午10:18 <br>
|
||||
*/
|
||||
@TargetUrl("http://djjchobits.iteye.com/blog/\\d+")
|
||||
@TargetUrl("http://my.oschina.net/flashsword/blog/*")
|
||||
public class Blog {
|
||||
|
||||
@Fetcher("//title")
|
||||
private String title;
|
||||
|
||||
@Fetcher(value = "div#main",type = Fetcher.Type.Css)
|
||||
@Fetcher(value = "div.BlogContent",type = Fetcher.Type.Css)
|
||||
private String content;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package us.codecraft.webmagic.annotation;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
|
@ -11,9 +12,10 @@ import us.codecraft.webmagic.Spider;
|
|||
*/
|
||||
public class TestFetcher {
|
||||
|
||||
@Ignore("takes long")
|
||||
@Test
|
||||
public void test() {
|
||||
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://djjchobits.iteye.com/blog/569000"), Blog.class)).run();
|
||||
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), Blog.class)).run();
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue