From 79522f941ec930700613d2430c9af9a5aed42f7f Mon Sep 17 00:00:00 2001 From: wuyifan <84390524@qq.com> Date: Fri, 17 Mar 2017 14:10:54 +0800 Subject: [PATCH] Bug, add null check to site in HttpClientDownloader & HttpClientGenerator --- .../downloader/HttpClientDownloader.java | 28 +++++++++++-------- .../downloader/HttpClientGenerator.java | 15 ++++++---- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 9e77ef5..952a750 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -82,14 +82,14 @@ public class HttpClientDownloader extends AbstractDownloader { } logger.info("downloading page {}", request.getUrl()); CloseableHttpResponse httpResponse = null; - int statusCode=0; + int statusCode = 0; try { HttpHost proxyHost = null; Proxy proxy = null; //TODO - if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { + if (site != null && site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { proxy = site.getHttpProxyFromPool(); proxyHost = proxy.getHttpHost(); - } else if(site.getHttpProxy()!= null){ + } else if (site != null && site.getHttpProxy() != null){ proxyHost = site.getHttpProxy(); } @@ -107,14 +107,14 @@ public class HttpClientDownloader extends AbstractDownloader { } } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); - if (site.getCycleRetryTimes() > 0) { + if (site != null && site.getCycleRetryTimes() > 0) { return addToCycleRetry(request, site); } onError(request); return null; } finally { request.putExtra(Request.STATUS_CODE, statusCode); - if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) { + if (site != null && site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request .getExtra(Request.STATUS_CODE)); } @@ -138,19 +138,23 @@ public class HttpClientDownloader extends AbstractDownloader { return acceptStatCode.contains(statusCode); } - protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map headers,HttpHost proxy) { + protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map headers, HttpHost proxy) { RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); if (headers != null) { for (Map.Entry headerEntry : headers.entrySet()) { requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue()); } } - RequestConfig.Builder requestConfigBuilder = RequestConfig.custom() - .setConnectionRequestTimeout(site.getTimeOut()) - .setSocketTimeout(site.getTimeOut()) - .setConnectTimeout(site.getTimeOut()) - .setCookieSpec(CookieSpecs.BEST_MATCH); - if (proxy !=null) { + + RequestConfig.Builder requestConfigBuilder = RequestConfig.custom(); + if (site != null) { + requestConfigBuilder.setConnectionRequestTimeout(site.getTimeOut()) + .setSocketTimeout(site.getTimeOut()) + .setConnectTimeout(site.getTimeOut()) + .setCookieSpec(CookieSpecs.BEST_MATCH); + } + + if (proxy != null) { requestConfigBuilder.setProxy(proxy); request.putExtra(Request.PROXY, proxy); } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index 1a0b2bd..aec5309 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -100,7 +100,7 @@ public class HttpClientGenerator { CredentialsProvider credsProvider = null; HttpClientBuilder httpClientBuilder = HttpClients.custom(); - if(proxy!=null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword())) + if (proxy != null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword())) { credsProvider= new BasicCredentialsProvider(); credsProvider.setCredentials( @@ -109,7 +109,7 @@ public class HttpClientGenerator { httpClientBuilder.setDefaultCredentialsProvider(credsProvider); } - if(site!=null&&site.getHttpProxy()!=null&&site.getUsernamePasswordCredentials()!=null){ + if (site != null && site.getHttpProxy()!= null && site.getUsernamePasswordCredentials() != null){ credsProvider = new BasicCredentialsProvider(); credsProvider.setCredentials( new AuthScope(site.getHttpProxy()),//可以访问的范围 @@ -137,14 +137,19 @@ public class HttpClientGenerator { } //解决post/redirect/post 302跳转问题 httpClientBuilder.setRedirectStrategy(new CustomRedirectStrategy()); - - SocketConfig socketConfig = SocketConfig.custom().setSoTimeout(site.getTimeOut()).setSoKeepAlive(true).setTcpNoDelay(true).build(); + + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true); + if (site != null) { + socketConfigBuilder.setSoTimeout(site.getTimeOut()); + } + SocketConfig socketConfig = socketConfigBuilder.build(); httpClientBuilder.setDefaultSocketConfig(socketConfig); connectionManager.setDefaultSocketConfig(socketConfig); if (site != null) { httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true)); + generateCookie(httpClientBuilder, site); } - generateCookie(httpClientBuilder, site); return httpClientBuilder.build(); }