#30 reuse PoolingClientConnectionManager for HttpClientDownloader
parent
5a226387e0
commit
7fb44d2eec
|
@ -34,6 +34,8 @@ public class HttpClientDownloader implements Downloader {
|
||||||
|
|
||||||
private Logger logger = Logger.getLogger(getClass());
|
private Logger logger = Logger.getLogger(getClass());
|
||||||
|
|
||||||
|
private HttpClientPool httpClientPool;
|
||||||
|
|
||||||
private int poolSize = 1;
|
private int poolSize = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -58,6 +60,13 @@ public class HttpClientDownloader implements Downloader {
|
||||||
return (Html) page.getHtml();
|
return (Html) page.getHtml();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private HttpClientPool getHttpClientPool(){
|
||||||
|
if (httpClientPool==null){
|
||||||
|
httpClientPool = new HttpClientPool(poolSize);
|
||||||
|
}
|
||||||
|
return httpClientPool;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Page download(Request request, Task task) {
|
public Page download(Request request, Task task) {
|
||||||
Site site = null;
|
Site site = null;
|
||||||
|
@ -78,7 +87,7 @@ public class HttpClientDownloader implements Downloader {
|
||||||
acceptStatCode.add(200);
|
acceptStatCode.add(200);
|
||||||
}
|
}
|
||||||
logger.info("downloading page " + request.getUrl());
|
logger.info("downloading page " + request.getUrl());
|
||||||
HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site);
|
HttpClient httpClient = getHttpClientPool().getClient(site);
|
||||||
try {
|
try {
|
||||||
HttpGet httpGet = new HttpGet(request.getUrl());
|
HttpGet httpGet = new HttpGet(request.getUrl());
|
||||||
if (headers!=null){
|
if (headers!=null){
|
||||||
|
@ -150,6 +159,7 @@ public class HttpClientDownloader implements Downloader {
|
||||||
@Override
|
@Override
|
||||||
public void setThread(int thread) {
|
public void setThread(int thread) {
|
||||||
poolSize = thread;
|
poolSize = thread;
|
||||||
|
httpClientPool = new HttpClientPool(thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleGzip(HttpResponse httpResponse) {
|
private void handleGzip(HttpResponse httpResponse) {
|
||||||
|
|
|
@ -24,23 +24,19 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class HttpClientPool {
|
public class HttpClientPool {
|
||||||
|
|
||||||
public static volatile HttpClientPool INSTANCE;
|
|
||||||
|
|
||||||
public static HttpClientPool getInstance(int poolSize) {
|
|
||||||
if (INSTANCE == null) {
|
|
||||||
synchronized (HttpClientPool.class) {
|
|
||||||
if (INSTANCE == null) {
|
|
||||||
INSTANCE = new HttpClientPool(poolSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return INSTANCE;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int poolSize;
|
private int poolSize;
|
||||||
|
|
||||||
private HttpClientPool(int poolSize) {
|
private PoolingClientConnectionManager connectionManager;
|
||||||
|
|
||||||
|
public HttpClientPool(int poolSize) {
|
||||||
this.poolSize = poolSize;
|
this.poolSize = poolSize;
|
||||||
|
SchemeRegistry schemeRegistry = new SchemeRegistry();
|
||||||
|
schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
|
||||||
|
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
|
||||||
|
|
||||||
|
connectionManager = new PoolingClientConnectionManager(schemeRegistry);
|
||||||
|
connectionManager.setMaxTotal(poolSize);
|
||||||
|
connectionManager.setDefaultMaxPerRoute(100);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HttpClient getClient(Site site) {
|
public HttpClient getClient(Site site) {
|
||||||
|
@ -58,7 +54,6 @@ public class HttpClientPool {
|
||||||
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 3000);
|
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
params.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
|
params.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
|
||||||
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
||||||
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
||||||
|
@ -67,13 +62,6 @@ public class HttpClientPool {
|
||||||
}
|
}
|
||||||
paramsBean.setUseExpectContinue(false);
|
paramsBean.setUseExpectContinue(false);
|
||||||
|
|
||||||
SchemeRegistry schemeRegistry = new SchemeRegistry();
|
|
||||||
schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
|
|
||||||
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
|
|
||||||
|
|
||||||
PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager(schemeRegistry);
|
|
||||||
connectionManager.setMaxTotal(poolSize);
|
|
||||||
connectionManager.setDefaultMaxPerRoute(100);
|
|
||||||
DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager, params);
|
DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager, params);
|
||||||
if (site != null) {
|
if (site != null) {
|
||||||
generateCookie(httpClient, site);
|
generateCookie(httpClient, site);
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.3.2</version>
|
<version>0.3.3-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue