这个改动的原因基于两点:1)代理归还给代理池的时机应该是执行完http请求后就要尽早归还 2)http代理应该是HttpClientDownloader该考虑的事,不应该有Spider来处理,Spider并不知道它的downloader是个HttpClientDownloader
parent
5f106c9c69
commit
6b179c3d55
|
@ -325,10 +325,6 @@ public class Spider implements Runnable, Task {
|
|||
onError(requestFinal);
|
||||
logger.error("process request " + requestFinal + " error", e);
|
||||
} finally {
|
||||
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
|
||||
site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal
|
||||
.getExtra(Request.STATUS_CODE));
|
||||
}
|
||||
pageCount.incrementAndGet();
|
||||
signalNewUrl();
|
||||
}
|
||||
|
|
|
@ -109,6 +109,10 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
return null;
|
||||
} finally {
|
||||
request.putExtra(Request.STATUS_CODE, statusCode);
|
||||
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
|
||||
site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request
|
||||
.getExtra(Request.STATUS_CODE));
|
||||
}
|
||||
try {
|
||||
if (httpResponse != null) {
|
||||
//ensure the connection is released back to pool
|
||||
|
|
Loading…
Reference in New Issue