From 160a149b0590da553b96bddabfe023b5ddf8941d Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sun, 3 Nov 2013 23:10:09 +0800 Subject: [PATCH] todo bugfix --- .../src/main/java/us/codecraft/webmagic/Spider.java | 2 +- .../codecraft/webmagic/downloader/HttpClientDownloader.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 32bd3e1..04ac894 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -482,7 +482,7 @@ public class Spider implements Runnable, Task { /** * Exit when complete.
* True: exit when all url of the site is downloaded.
- * False: not exit until call stop manually.
+ * False: not exit until call stop() manually.
* * @param exitWhenComplete * @return diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 1bee564..d6ee8c1 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -90,11 +90,15 @@ public class HttpClientDownloader implements Downloader { HttpClient httpClient = getHttpClientPool().getClient(site); try { HttpGet httpGet = new HttpGet(request.getUrl()); + if (headers!=null){ for (Map.Entry headerEntry : headers.entrySet()) { httpGet.addHeader(headerEntry.getKey(),headerEntry.getValue()); } } + if (!httpGet.containsHeader("Accept-Encoding")) { + httpGet.addHeader("Accept-Encoding", "gzip"); + } HttpResponse httpResponse = null; int tried = 0; boolean retry; @@ -168,6 +172,7 @@ public class HttpClientDownloader implements Downloader { HeaderElement[] codecs = ceheader.getElements(); for (HeaderElement codec : codecs) { if (codec.getName().equalsIgnoreCase("gzip")) { + //todo bugfix httpResponse.setEntity( new GzipDecompressingEntity(httpResponse.getEntity())); }