#27 add timeout config to site

master
yihua.huang 2013-10-11 07:36:16 +08:00
parent 3b00190f99
commit 1a2c84ea78
4 changed files with 25 additions and 5 deletions

View File

@ -32,6 +32,8 @@ public class Site {
private int cycleRetryTimes = 0;
private int timeOut = 2000;
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
@ -131,6 +133,18 @@ public class Site {
return charset;
}
public int getTimeOut() {
return timeOut;
}
/**
* set timeout for downloader in ms
* @param timeOut
*/
public void setTimeOut(int timeOut) {
this.timeOut = timeOut;
}
/**
* Set acceptStatCode.<br>
* When status code of http response is in acceptStatCodes, it will be processed.<br>

View File

@ -52,8 +52,8 @@ public class HttpClientPool {
if (site != null && site.getUserAgent() != null) {
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
}
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000);
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000);
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, site.getTimeOut());
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, site.getTimeOut());
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1);

View File

@ -1,5 +1,7 @@
package us.codecraft.webmagic.example;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.UrlTemplate;
@ -12,17 +14,21 @@ import java.util.List;
* @author code4crafter@gmail.com
*/
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
public class BaiduBaike {
public class BaiduBaike implements AfterExtractor{
private String word;
@ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()")
private String description;
@Override
public void afterProcess(Page page) {
}
public static void main(String[] args) {
List<Param> words = new ArrayList<Param>();
words.add(new Param().put("word","红烧肉"));
OOSpider.direct(words, BaiduBaike.class).thread(10).run();
}
}