diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java index 4c7b992..0817335 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java @@ -32,6 +32,8 @@ public class Site { private int cycleRetryTimes = 0; + private int timeOut = 2000; + private static final Set DEFAULT_STATUS_CODE_SET = new HashSet(); private Set acceptStatCode = DEFAULT_STATUS_CODE_SET; @@ -131,6 +133,18 @@ public class Site { return charset; } + public int getTimeOut() { + return timeOut; + } + + /** + * set timeout for downloader in ms + * @param timeOut + */ + public void setTimeOut(int timeOut) { + this.timeOut = timeOut; + } + /** * Set acceptStatCode.
* When status code of http response is in acceptStatCodes, it will be processed.
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java index f2fffad..52e2f99 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java @@ -52,8 +52,8 @@ public class HttpClientPool { if (site != null && site.getUserAgent() != null) { params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent()); } - params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000); - params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000); + params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, site.getTimeOut()); + params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, site.getTimeOut()); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); paramsBean.setVersion(HttpVersion.HTTP_1_1); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java index becc311..b82b8aa 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java @@ -1,5 +1,7 @@ package us.codecraft.webmagic.example; +import us.codecraft.webmagic.Page; +import us.codecraft.webmagic.model.AfterExtractor; import us.codecraft.webmagic.model.OOSpider; import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.UrlTemplate; @@ -12,17 +14,21 @@ import java.util.List; * @author code4crafter@gmail.com */ @UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8") -public class BaiduBaike { +public class BaiduBaike implements AfterExtractor{ private String word; @ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()") private String description; + @Override + public void afterProcess(Page page) { + + } + public static void main(String[] args) { List words = new ArrayList(); words.add(new Param().put("word","红烧肉")); OOSpider.direct(words, BaiduBaike.class).thread(10).run(); } - } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java index efa5faf..a64ca29 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java @@ -89,7 +89,7 @@ public class OOSpider extends Spider { return new OOSpider(null, null, pageModels); } - public static OOSpider direct(Collection params,Class... pageModels) { + public static OOSpider direct(Collection params, Class... pageModels) { return new OOSpider(null, null, pageModels); }