#27 add timeout config to site
parent
3b00190f99
commit
1a2c84ea78
|
@ -32,6 +32,8 @@ public class Site {
|
||||||
|
|
||||||
private int cycleRetryTimes = 0;
|
private int cycleRetryTimes = 0;
|
||||||
|
|
||||||
|
private int timeOut = 2000;
|
||||||
|
|
||||||
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
|
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
|
||||||
|
|
||||||
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
|
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
|
||||||
|
@ -131,6 +133,18 @@ public class Site {
|
||||||
return charset;
|
return charset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getTimeOut() {
|
||||||
|
return timeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set timeout for downloader in ms
|
||||||
|
* @param timeOut
|
||||||
|
*/
|
||||||
|
public void setTimeOut(int timeOut) {
|
||||||
|
this.timeOut = timeOut;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set acceptStatCode.<br>
|
* Set acceptStatCode.<br>
|
||||||
* When status code of http response is in acceptStatCodes, it will be processed.<br>
|
* When status code of http response is in acceptStatCodes, it will be processed.<br>
|
||||||
|
|
|
@ -52,8 +52,8 @@ public class HttpClientPool {
|
||||||
if (site != null && site.getUserAgent() != null) {
|
if (site != null && site.getUserAgent() != null) {
|
||||||
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
|
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
|
||||||
}
|
}
|
||||||
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000);
|
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, site.getTimeOut());
|
||||||
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000);
|
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, site.getTimeOut());
|
||||||
|
|
||||||
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
||||||
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package us.codecraft.webmagic.example;
|
package us.codecraft.webmagic.example;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.Page;
|
||||||
|
import us.codecraft.webmagic.model.AfterExtractor;
|
||||||
import us.codecraft.webmagic.model.OOSpider;
|
import us.codecraft.webmagic.model.OOSpider;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
import us.codecraft.webmagic.model.annotation.UrlTemplate;
|
import us.codecraft.webmagic.model.annotation.UrlTemplate;
|
||||||
|
@ -12,17 +14,21 @@ import java.util.List;
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
*/
|
*/
|
||||||
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
|
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
|
||||||
public class BaiduBaike {
|
public class BaiduBaike implements AfterExtractor{
|
||||||
|
|
||||||
private String word;
|
private String word;
|
||||||
|
|
||||||
@ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()")
|
@ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()")
|
||||||
private String description;
|
private String description;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void afterProcess(Page page) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
List<Param> words = new ArrayList<Param>();
|
List<Param> words = new ArrayList<Param>();
|
||||||
words.add(new Param().put("word","红烧肉"));
|
words.add(new Param().put("word","红烧肉"));
|
||||||
OOSpider.direct(words, BaiduBaike.class).thread(10).run();
|
OOSpider.direct(words, BaiduBaike.class).thread(10).run();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class OOSpider extends Spider {
|
||||||
return new OOSpider(null, null, pageModels);
|
return new OOSpider(null, null, pageModels);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OOSpider direct(Collection<Param> params,Class... pageModels) {
|
public static OOSpider direct(Collection<Param> params, Class... pageModels) {
|
||||||
return new OOSpider(null, null, pageModels);
|
return new OOSpider(null, null, pageModels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue