这个改动的原因基于两点:1)代理归还给代理池的时机应该是执行完http请求后就要尽早归还 2)http代理应该是HttpClientDownloader该考虑的事,不应该有Spider来处理,Spider并不知道它的downloader是个HttpClientDownloader
parent
5f106c9c69
commit
6b179c3d55
|
@ -325,10 +325,6 @@ public class Spider implements Runnable, Task {
|
||||||
onError(requestFinal);
|
onError(requestFinal);
|
||||||
logger.error("process request " + requestFinal + " error", e);
|
logger.error("process request " + requestFinal + " error", e);
|
||||||
} finally {
|
} finally {
|
||||||
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
|
|
||||||
site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal
|
|
||||||
.getExtra(Request.STATUS_CODE));
|
|
||||||
}
|
|
||||||
pageCount.incrementAndGet();
|
pageCount.incrementAndGet();
|
||||||
signalNewUrl();
|
signalNewUrl();
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,6 +109,10 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
return null;
|
return null;
|
||||||
} finally {
|
} finally {
|
||||||
request.putExtra(Request.STATUS_CODE, statusCode);
|
request.putExtra(Request.STATUS_CODE, statusCode);
|
||||||
|
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
|
||||||
|
site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request
|
||||||
|
.getExtra(Request.STATUS_CODE));
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
if (httpResponse != null) {
|
if (httpResponse != null) {
|
||||||
//ensure the connection is released back to pool
|
//ensure the connection is released back to pool
|
||||||
|
|
Loading…
Reference in New Issue