add some null pointer check for httpclientdownloader
parent
6c61c5476d
commit
067f3ea0cb
|
@ -17,6 +17,8 @@ import us.codecraft.webmagic.selector.PlainText;
|
|||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -34,10 +36,23 @@ public class HttpClientDownloader implements Downloader {
|
|||
|
||||
@Override
|
||||
public Page download(Request request, Task task) {
|
||||
Site site = task.getSite();
|
||||
Site site = null;
|
||||
if (task != null) {
|
||||
site = task.getSite();
|
||||
}
|
||||
int retryTimes = 0;
|
||||
Set<Integer> acceptStatCode;
|
||||
String charset = null;
|
||||
if (site != null) {
|
||||
retryTimes = site.getRetryTimes();
|
||||
acceptStatCode = site.getAcceptStatCode();
|
||||
charset = site.getCharset();
|
||||
} else {
|
||||
acceptStatCode = new HashSet<Integer>();
|
||||
acceptStatCode.add(200);
|
||||
}
|
||||
logger.info("downloading page " + request.getUrl());
|
||||
HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site);
|
||||
String charset = site.getCharset();
|
||||
try {
|
||||
HttpGet httpGet = new HttpGet(request.getUrl());
|
||||
HttpResponse httpResponse = null;
|
||||
|
@ -49,7 +64,8 @@ public class HttpClientDownloader implements Downloader {
|
|||
retry = false;
|
||||
} catch (IOException e) {
|
||||
tried++;
|
||||
if (tried > site.getRetryTimes()) {
|
||||
|
||||
if (tried > retryTimes) {
|
||||
logger.warn("download page " + request.getUrl() + " error", e);
|
||||
return null;
|
||||
}
|
||||
|
@ -58,7 +74,7 @@ public class HttpClientDownloader implements Downloader {
|
|||
}
|
||||
} while (retry);
|
||||
int statusCode = httpResponse.getStatusLine().getStatusCode();
|
||||
if (site.getAcceptStatCode().contains(statusCode)) {
|
||||
if (acceptStatCode.contains(statusCode)) {
|
||||
//charset
|
||||
if (charset == null) {
|
||||
String value = httpResponse.getEntity().getContentType().getValue();
|
||||
|
@ -66,7 +82,7 @@ public class HttpClientDownloader implements Downloader {
|
|||
}
|
||||
//
|
||||
handleGzip(httpResponse);
|
||||
return handleResponse(request, charset, httpResponse,task);
|
||||
return handleResponse(request, charset, httpResponse, task);
|
||||
} else {
|
||||
logger.warn("code error " + statusCode + "\t" + request.getUrl());
|
||||
}
|
||||
|
@ -76,7 +92,7 @@ public class HttpClientDownloader implements Downloader {
|
|||
return null;
|
||||
}
|
||||
|
||||
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse,Task task) throws IOException {
|
||||
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
|
||||
String content = IOUtils.toString(httpResponse.getEntity().getContent(),
|
||||
charset);
|
||||
Page page = new Page();
|
||||
|
@ -88,7 +104,7 @@ public class HttpClientDownloader implements Downloader {
|
|||
|
||||
@Override
|
||||
public void setThread(int thread) {
|
||||
poolSize=thread;
|
||||
poolSize = thread;
|
||||
}
|
||||
|
||||
private void handleGzip(HttpResponse httpResponse) {
|
||||
|
|
|
@ -50,24 +50,30 @@ public class HttpClientPool {
|
|||
|
||||
private HttpClient generateClient(Site site) {
|
||||
HttpParams params = new BasicHttpParams();
|
||||
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
|
||||
if (site != null && site.getUserAgent() != null) {
|
||||
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
|
||||
}
|
||||
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000);
|
||||
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000);
|
||||
|
||||
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
|
||||
paramsBean.setVersion(HttpVersion.HTTP_1_1);
|
||||
paramsBean.setContentCharset(site.getCharset());
|
||||
if (site != null && site.getCharset() != null) {
|
||||
paramsBean.setContentCharset(site.getCharset());
|
||||
}
|
||||
paramsBean.setUseExpectContinue(false);
|
||||
|
||||
SchemeRegistry schemeRegistry = new SchemeRegistry();
|
||||
schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
|
||||
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
|
||||
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
|
||||
|
||||
PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager(schemeRegistry);
|
||||
connectionManager.setMaxTotal(poolSize);
|
||||
connectionManager.setDefaultMaxPerRoute(100);
|
||||
DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager, params);
|
||||
generateCookie(httpClient, site);
|
||||
if (site != null) {
|
||||
generateCookie(httpClient, site);
|
||||
}
|
||||
httpClient.getParams().setIntParameter("http.socket.timeout", 60000);
|
||||
httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
|
||||
return httpClient;
|
||||
|
@ -75,10 +81,12 @@ public class HttpClientPool {
|
|||
|
||||
private void generateCookie(DefaultHttpClient httpClient, Site site) {
|
||||
CookieStore cookieStore = new BasicCookieStore();
|
||||
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
|
||||
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
|
||||
cookie.setDomain(site.getDomain());
|
||||
cookieStore.addCookie(cookie);
|
||||
if (site.getCookies() != null) {
|
||||
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
|
||||
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
|
||||
cookie.setDomain(site.getDomain());
|
||||
cookieStore.addCookie(cookie);
|
||||
}
|
||||
}
|
||||
httpClient.setCookieStore(cookieStore);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue