processor
parent
50edd22ef6
commit
c5cf05640a
|
@ -27,7 +27,7 @@ public class CssSelector implements Selector {
|
||||||
public String select(String text) {
|
public String select(String text) {
|
||||||
Document doc = Jsoup.parse(text);
|
Document doc = Jsoup.parse(text);
|
||||||
Elements elements = doc.select(selectorText);
|
Elements elements = doc.select(selectorText);
|
||||||
if (CollectionUtils.isNotEmpty(elements)) {
|
if (CollectionUtils.isEmpty(elements)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return elements.get(0).outerHtml();
|
return elements.get(0).outerHtml();
|
||||||
|
|
|
@ -5,13 +5,13 @@ package us.codecraft.webmagic.annotation;
|
||||||
* @date: 13-8-1 <br>
|
* @date: 13-8-1 <br>
|
||||||
* Time: 下午10:18 <br>
|
* Time: 下午10:18 <br>
|
||||||
*/
|
*/
|
||||||
@TargetUrl("http://djjchobits.iteye.com/blog/\\d+")
|
@TargetUrl("http://my.oschina.net/flashsword/blog/*")
|
||||||
public class Blog {
|
public class Blog {
|
||||||
|
|
||||||
@Fetcher("//title")
|
@Fetcher("//title")
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
@Fetcher(value = "div#main",type = Fetcher.Type.Css)
|
@Fetcher(value = "div.BlogContent",type = Fetcher.Type.Css)
|
||||||
private String content;
|
private String content;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package us.codecraft.webmagic.annotation;
|
package us.codecraft.webmagic.annotation;
|
||||||
|
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Spider;
|
import us.codecraft.webmagic.Spider;
|
||||||
|
@ -11,9 +12,10 @@ import us.codecraft.webmagic.Spider;
|
||||||
*/
|
*/
|
||||||
public class TestFetcher {
|
public class TestFetcher {
|
||||||
|
|
||||||
|
@Ignore("takes long")
|
||||||
@Test
|
@Test
|
||||||
public void test() {
|
public void test() {
|
||||||
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://djjchobits.iteye.com/blog/569000"), Blog.class)).run();
|
Spider.create(ObjectPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), Blog.class)).run();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue