这个改动的原因基于两点:1)代理归还给代理池的时机应该是执行完http请求后就要尽早归还 2)http代理应该是HttpClientDownloader该考虑的事,不应该有Spider来处理,Spider并不知道它的downloader是个HttpClientDownloader

master
zhangheng09 2016-03-12 20:09:41 +08:00
parent 5f106c9c69
commit 6b179c3d55
2 changed files with 4 additions and 4 deletions

View File

@ -325,10 +325,6 @@ public class Spider implements Runnable, Task {
onError(requestFinal);
logger.error("process request " + requestFinal + " error", e);
} finally {
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal
.getExtra(Request.STATUS_CODE));
}
pageCount.incrementAndGet();
signalNewUrl();
}

View File

@ -109,6 +109,10 @@ public class HttpClientDownloader extends AbstractDownloader {
return null;
} finally {
request.putExtra(Request.STATUS_CODE, statusCode);
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request
.getExtra(Request.STATUS_CODE));
}
try {
if (httpResponse != null) {
//ensure the connection is released back to pool