diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 32bd3e1..04ac894 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -482,7 +482,7 @@ public class Spider implements Runnable, Task { /** * Exit when complete.
* True: exit when all url of the site is downloaded.
- * False: not exit until call stop manually.
+ * False: not exit until call stop() manually.
* * @param exitWhenComplete * @return diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 1bee564..d6ee8c1 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -90,11 +90,15 @@ public class HttpClientDownloader implements Downloader { HttpClient httpClient = getHttpClientPool().getClient(site); try { HttpGet httpGet = new HttpGet(request.getUrl()); + if (headers!=null){ for (Map.Entry headerEntry : headers.entrySet()) { httpGet.addHeader(headerEntry.getKey(),headerEntry.getValue()); } } + if (!httpGet.containsHeader("Accept-Encoding")) { + httpGet.addHeader("Accept-Encoding", "gzip"); + } HttpResponse httpResponse = null; int tried = 0; boolean retry; @@ -168,6 +172,7 @@ public class HttpClientDownloader implements Downloader { HeaderElement[] codecs = ceheader.getElements(); for (HeaderElement codec : codecs) { if (codec.getName().equalsIgnoreCase("gzip")) { + //todo bugfix httpResponse.setEntity( new GzipDecompressingEntity(httpResponse.getEntity())); }