From 1fc8e104ab8277a5bd002c63659dc05d9f6594d8 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Wed, 4 Sep 2013 10:32:13 +0800 Subject: [PATCH] add cycle retry --- .../src/main/java/us/codecraft/webmagic/Spider.java | 6 ++++++ .../us/codecraft/webmagic/samples/F58PageProcesser.java | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 723e805..47cefd0 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -310,6 +310,12 @@ public class Spider implements Runnable, Task { sleep(site.getSleepTime()); return; } + //for cycle retry + if (page.getHtml()==null){ + addRequest(page); + sleep(site.getSleepTime()); + return; + } pageProcessor.process(page); addRequest(page); if (!page.getResultItems().isSkip()) { diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/F58PageProcesser.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/F58PageProcesser.java index 7124a8c..3d27be8 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/F58PageProcesser.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/F58PageProcesser.java @@ -4,6 +4,7 @@ import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; +import us.codecraft.webmagic.scheduler.RedisScheduler; import java.util.List; @@ -24,10 +25,10 @@ public class F58PageProcesser implements PageProcessor { @Override public Site getSite() { - return Site.me().setDomain("sh.58.com").addStartUrl("http://sh.58.com/"); //To change body of implemented methods use File | Settings | File Templates. + return Site.me().setDomain("sh.58.com").addStartUrl("http://sh1.51a8.com/").setCycleRetryTimes(2); //To change body of implemented methods use File | Settings | File Templates. } public static void main(String[] args) { - Spider.create(new F58PageProcesser()).run(); + Spider.create(new F58PageProcesser()).setScheduler(new RedisScheduler("localhost")).run(); } }