#27 add timeout config to site
parent
3b00190f99
commit
1a2c84ea78
|
@ -32,6 +32,8 @@ public class Site {
|
|||
|
||||
private int cycleRetryTimes = 0;
|
||||
|
||||
private int timeOut = 2000;
|
||||
|
||||
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
|
||||
|
||||
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
|
||||
|
@ -131,6 +133,18 @@ public class Site {
|
|||
return charset;
|
||||
}
|
||||
|
||||
public int getTimeOut() {
|
||||
return timeOut;
|
||||
}
|
||||
|
||||
/**
|
||||
* set timeout for downloader in ms
|
||||
* @param timeOut
|
||||
*/
|
||||
public void setTimeOut(int timeOut) {
|
||||
this.timeOut = timeOut;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set acceptStatCode.<br>
|
||||
* When status code of http response is in acceptStatCodes, it will be processed.<br>
|
||||
|
|
|
@ -52,8 +52,8 @@ public class HttpClientPool {
|
|||
if (site != null && site.getUserAgent() != null) {
|
||||
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
|
||||
}
|
||||
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000);
|
||||
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000);
|
||||
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, site.getTimeOut());
|
||||
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, site.getTimeOut());
|
||||
|
||||
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
||||
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package us.codecraft.webmagic.example;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.model.AfterExtractor;
|
||||
import us.codecraft.webmagic.model.OOSpider;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
import us.codecraft.webmagic.model.annotation.UrlTemplate;
|
||||
|
@ -12,17 +14,21 @@ import java.util.List;
|
|||
* @author code4crafter@gmail.com
|
||||
*/
|
||||
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
|
||||
public class BaiduBaike {
|
||||
public class BaiduBaike implements AfterExtractor{
|
||||
|
||||
private String word;
|
||||
|
||||
@ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()")
|
||||
private String description;
|
||||
|
||||
@Override
|
||||
public void afterProcess(Page page) {
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
List<Param> words = new ArrayList<Param>();
|
||||
words.add(new Param().put("word","红烧肉"));
|
||||
OOSpider.direct(words, BaiduBaike.class).thread(10).run();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ public class OOSpider extends Spider {
|
|||
return new OOSpider(null, null, pageModels);
|
||||
}
|
||||
|
||||
public static OOSpider direct(Collection<Param> params,Class... pageModels) {
|
||||
public static OOSpider direct(Collection<Param> params, Class... pageModels) {
|
||||
return new OOSpider(null, null, pageModels);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue