From 636359300f6d5b003c64aa4c1685c655da8b584c Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Mon, 29 May 2017 08:29:53 +0800 Subject: [PATCH] add Site.disableCookieManagement #577 --- .../main/java/us/codecraft/webmagic/Site.java | 17 +++++++++++++++++ .../downloader/HttpClientDownloader.java | 2 +- .../downloader/HttpClientGenerator.java | 4 ++++ .../downloader/HttpClientDownloaderTest.java | 17 +++++++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java index 13fa4c1..8aadb3e 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java @@ -41,6 +41,8 @@ public class Site { private boolean useGzip = true; + private boolean disableCookieManagement = false; + static { DEFAULT_STATUS_CODE_SET.add(HttpConstant.StatusCode.CODE_200); } @@ -309,6 +311,21 @@ public class Site { return this; } + public boolean isDisableCookieManagement() { + return disableCookieManagement; + } + + /** + * Downloader is supposed to store response cookie. + * Disable it to ignore all cookie fields and stay clean. + * Warning: Set cookie will still NOT work if disableCookieManagement is true. + * @param disableCookieManagement disableCookieManagement + */ + public Site setDisableCookieManagement(boolean disableCookieManagement) { + this.disableCookieManagement = disableCookieManagement; + return this; + } + public Task toTask() { return new Task() { @Override diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 898adb7..7119d10 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -138,6 +138,6 @@ public class HttpClientDownloader extends AbstractDownloader { } private String getHtmlCharset(HttpResponse httpResponse, byte[] contentBytes) throws IOException { - return CharsetUtils.detectCharset(httpResponse.getEntity().getContentType().getValue(), contentBytes); + return CharsetUtils.detectCharset(httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue(), contentBytes); } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index 9e17f60..562f36f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -127,6 +127,10 @@ public class HttpClientGenerator { } private void generateCookie(HttpClientBuilder httpClientBuilder, Site site) { + if (site.isDisableCookieManagement()) { + httpClientBuilder.disableCookieManagement(); + return; + } CookieStore cookieStore = new BasicCookieStore(); for (Map.Entry cookieEntry : site.getCookies().entrySet()) { BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue()); diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java index 685bdff..8e36c05 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java @@ -172,6 +172,23 @@ public class HttpClientDownloaderTest { }); } + @Test + public void test_disableCookieManagement() throws Exception { + HttpServer server = httpServer(13423); + server.get(not(eq(cookie("cookie"), "cookie-webmagic"))).response("ok"); + Runner.running(server, new Runnable() { + @Override + public void run() throws Exception { + HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); + Request request = new Request(); + request.setUrl("http://127.0.0.1:13423"); + request.addCookie("cookie","cookie-webmagic"); + Page page = httpClientDownloader.download(request, Site.me().setDisableCookieManagement(true).toTask()); + assertThat(page.getRawText()).isEqualTo("ok"); + } + }); + } + @Test public void test_set_request_header() throws Exception { HttpServer server = httpServer(13423);