diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index b26bcf9..e14eff5 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -1,28 +1,10 @@ package us.codecraft.webmagic.downloader; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.IOUtils; -import org.apache.http.Header; -import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.annotation.ThreadSafe; -import org.apache.http.auth.AuthState; -import org.apache.http.auth.UsernamePasswordCredentials; -import org.apache.http.client.CookieStore; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.client.methods.RequestBuilder; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.cookie.Cookie; -import org.apache.http.impl.auth.BasicScheme; -import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.message.BasicNameValuePair; -import org.apache.http.protocol.BasicHttpContext; -import org.apache.http.protocol.HttpContext; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,16 +12,15 @@ import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.ProxyProvider; import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.HttpClientUtils; -import us.codecraft.webmagic.utils.HttpConstant; import java.io.IOException; import java.nio.charset.Charset; -import java.util.*; +import java.util.HashMap; +import java.util.Map; /** @@ -96,33 +77,12 @@ public class HttpClientDownloader extends AbstractDownloader { } logger.debug("downloading page {}", request.getUrl()); CloseableHttpResponse httpResponse = null; - int statusCode = 0; Site site = task.getSite(); - Proxy proxy = null; - HttpClientContext httpContext = new HttpClientContext(); - if (proxyProvider != null) { - proxy = proxyProvider.getProxy(task); - AuthState authState = new AuthState(); - authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword())); - httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState); - } CloseableHttpClient httpClient = getHttpClient(site); - HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy); - if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) { - CookieStore cookieStore = new BasicCookieStore(); - for (Cookie c : request.getCookies()) { - cookieStore.addCookie(c); - } - httpContext.setCookieStore(cookieStore); - } - if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) { - for (Header h : request.getHeaders()) { - httpUriRequest.setHeader(h); - } - } + HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, site, proxyProvider != null ? proxyProvider.getProxy(task) : null); try { - httpResponse = httpClient.execute(httpUriRequest, httpContext); - statusCode = httpResponse.getStatusLine().getStatusCode(); + httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); + int statusCode = httpResponse.getStatusLine().getStatusCode(); if (site.getAcceptStatCode().contains(statusCode)) { Page page = handleResponse(request, site.getCharset(), httpResponse, task); onSuccess(request); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientRequestContext.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientRequestContext.java new file mode 100644 index 0000000..b0afc65 --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientRequestContext.java @@ -0,0 +1,33 @@ +package us.codecraft.webmagic.downloader; + +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.client.protocol.HttpClientContext; + +/** + * @author code4crafter@gmail.com + * Date: 17/4/8 + * Time: 19:43 + */ +public class HttpClientRequestContext { + + private HttpUriRequest httpUriRequest; + + private HttpClientContext httpClientContext; + + public HttpUriRequest getHttpUriRequest() { + return httpUriRequest; + } + + public void setHttpUriRequest(HttpUriRequest httpUriRequest) { + this.httpUriRequest = httpUriRequest; + } + + public HttpClientContext getHttpClientContext() { + return httpClientContext; + } + + public void setHttpClientContext(HttpClientContext httpClientContext) { + this.httpClientContext = httpClientContext; + } + +} diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java index 22aa31d..acf1a7c 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java @@ -1,11 +1,20 @@ package us.codecraft.webmagic.downloader; +import org.apache.commons.collections.CollectionUtils; +import org.apache.http.Header; import org.apache.http.HttpHost; +import org.apache.http.auth.AuthState; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CookieStore; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.RequestBuilder; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.cookie.Cookie; import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicCookieStore; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.proxy.Proxy; @@ -20,7 +29,29 @@ import java.util.Map; */ public class HttpUriRequestConverter { - public HttpUriRequest convert(Request request, Site site, Proxy proxy) { + public HttpClientRequestContext convert(Request request, Site site, Proxy proxy) { + HttpClientRequestContext httpClientRequestContext = new HttpClientRequestContext(); + httpClientRequestContext.setHttpUriRequest(convertHttpUriRequest(request, site, proxy)); + httpClientRequestContext.setHttpClientContext(convertHttpClientContext(request, site, proxy)); + return httpClientRequestContext; + } + + private HttpClientContext convertHttpClientContext(Request request, Site site, Proxy proxy) { + HttpClientContext httpContext = new HttpClientContext(); + AuthState authState = new AuthState(); + authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword())); + httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState); + if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) { + CookieStore cookieStore = new BasicCookieStore(); + for (Cookie c : request.getCookies()) { + cookieStore.addCookie(c); + } + httpContext.setCookieStore(cookieStore); + } + return httpContext; + } + + private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) { RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); if (site.getHeaders() != null) { for (Map.Entry headerEntry : site.getHeaders().entrySet()) { @@ -40,7 +71,13 @@ public class HttpUriRequestConverter { requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort())); } requestBuilder.setConfig(requestConfigBuilder.build()); - return requestBuilder.build(); + HttpUriRequest httpUriRequest = requestBuilder.build(); + if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) { + for (Header h : request.getHeaders()) { + httpUriRequest.setHeader(h); + } + } + return httpUriRequest; } private RequestBuilder selectRequestMethod(Request request) {