add retry sleep time
parent
8ffc1a7093
commit
4978665633
|
@ -39,6 +39,8 @@ public class Site {
|
||||||
|
|
||||||
private int cycleRetryTimes = 0;
|
private int cycleRetryTimes = 0;
|
||||||
|
|
||||||
|
private int retrySleepTime = 1000;
|
||||||
|
|
||||||
private int timeOut = 5000;
|
private int timeOut = 5000;
|
||||||
|
|
||||||
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
|
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
|
||||||
|
@ -49,8 +51,8 @@ public class Site {
|
||||||
|
|
||||||
private HttpHost httpProxy;
|
private HttpHost httpProxy;
|
||||||
|
|
||||||
private ProxyPool httpProxyPool;
|
private ProxyPool httpProxyPool;
|
||||||
|
|
||||||
private boolean useGzip = true;
|
private boolean useGzip = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -359,6 +361,20 @@ public class Site {
|
||||||
return useGzip;
|
return useGzip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getRetrySleepTime() {
|
||||||
|
return retrySleepTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set retry sleep times when download fail, 1000 by default. <br>
|
||||||
|
*
|
||||||
|
* @param retrySleepTime
|
||||||
|
*/
|
||||||
|
public Site setRetrySleepTime(int retrySleepTime) {
|
||||||
|
this.retrySleepTime = retrySleepTime;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether use gzip. <br>
|
* Whether use gzip. <br>
|
||||||
* Default is true, you can set it to false to disable gzip.
|
* Default is true, you can set it to false to disable gzip.
|
||||||
|
@ -448,31 +464,31 @@ public class Site {
|
||||||
*
|
*
|
||||||
* @return this
|
* @return this
|
||||||
*/
|
*/
|
||||||
public Site setHttpProxyPool(List<String[]> httpProxyList) {
|
public Site setHttpProxyPool(List<String[]> httpProxyList) {
|
||||||
this.httpProxyPool=new ProxyPool(httpProxyList);
|
this.httpProxyPool=new ProxyPool(httpProxyList);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Site enableHttpProxyPool() {
|
public Site enableHttpProxyPool() {
|
||||||
this.httpProxyPool=new ProxyPool();
|
this.httpProxyPool=new ProxyPool();
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ProxyPool getHttpProxyPool() {
|
public ProxyPool getHttpProxyPool() {
|
||||||
return httpProxyPool;
|
return httpProxyPool;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HttpHost getHttpProxyFromPool() {
|
public HttpHost getHttpProxyFromPool() {
|
||||||
return httpProxyPool.getProxy();
|
return httpProxyPool.getProxy();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void returnHttpProxyToPool(HttpHost proxy,int statusCode) {
|
public void returnHttpProxyToPool(HttpHost proxy,int statusCode) {
|
||||||
httpProxyPool.returnProxy(proxy,statusCode);
|
httpProxyPool.returnProxy(proxy,statusCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Site setProxyReuseInterval(int reuseInterval) {
|
public Site setProxyReuseInterval(int reuseInterval) {
|
||||||
this.httpProxyPool.setReuseInterval(reuseInterval);
|
this.httpProxyPool.setReuseInterval(reuseInterval);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -407,14 +407,14 @@ public class Spider implements Runnable, Task {
|
||||||
protected void processRequest(Request request) {
|
protected void processRequest(Request request) {
|
||||||
Page page = downloader.download(request, this);
|
Page page = downloader.download(request, this);
|
||||||
if (page == null) {
|
if (page == null) {
|
||||||
sleep(site.getSleepTime());
|
sleep(site.getRetrySleepTime());
|
||||||
onError(request);
|
onError(request);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// for cycle retry
|
// for cycle retry
|
||||||
if (page.isNeedCycleRetry()) {
|
if (page.isNeedCycleRetry()) {
|
||||||
extractAndAddRequests(page, true);
|
extractAndAddRequests(page, true);
|
||||||
sleep(site.getSleepTime());
|
sleep(site.getRetrySleepTime());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
pageProcessor.process(page);
|
pageProcessor.process(page);
|
||||||
|
|
Loading…
Reference in New Issue