diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index c4a0c01..5c51916 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -1,6 +1,7 @@ package us.codecraft.webmagic.downloader; import com.google.common.collect.Sets; +import org.apache.commons.io.IOUtils; import org.apache.http.HttpResponse; import org.apache.http.annotation.ThreadSafe; import org.apache.http.client.config.CookieSpecs; @@ -8,7 +9,6 @@ import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.RequestBuilder; import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.util.EntityUtils; import org.apache.log4j.Logger; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; @@ -158,7 +158,7 @@ public class HttpClientDownloader implements Downloader { } protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException { - String content = EntityUtils.toString(httpResponse.getEntity(), charset); + String content = IOUtils.toString(httpResponse.getEntity().getContent(), charset); Page page = new Page(); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcesser.java b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcesser.java index 071b7e6..af03166 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcesser.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcesser.java @@ -16,7 +16,7 @@ import java.util.List; public class BaiduBaikePageProcesser implements PageProcessor { private Site site = Site.me()//.setHttpProxy(new HttpHost("127.0.0.1",8888)) - .setCharset("utf-8").setRetryTimes(3).setSleepTime(1000).setUseGzip(true); + .setRetryTimes(3).setSleepTime(1000).setUseGzip(true); @Override public void process(Page page) { diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java index edd167d..6f901ff 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java @@ -4,7 +4,6 @@ import us.codecraft.webmagic.Page; import us.codecraft.webmagic.model.AfterExtractor; import us.codecraft.webmagic.model.OOSpider; import us.codecraft.webmagic.model.annotation.ExtractBy; -import us.codecraft.webmagic.model.annotation.UrlTemplate; import us.codecraft.webmagic.model.direct.Param; import java.util.ArrayList; @@ -12,10 +11,8 @@ import java.util.List; /** * @since 0.4.0 - * NO implement yet!!!!!!!!!!!! * @author code4crafter@gmail.com */ -@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8") public class BaiduBaike implements AfterExtractor{ private String word;