change EntityUtil to IOUtil because some encoding error
parent
00b0a751b4
commit
807aefe9df
|
@ -1,6 +1,7 @@
|
|||
package us.codecraft.webmagic.downloader;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.annotation.ThreadSafe;
|
||||
import org.apache.http.client.config.CookieSpecs;
|
||||
|
@ -8,7 +9,6 @@ import org.apache.http.client.config.RequestConfig;
|
|||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.RequestBuilder;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
|
@ -158,7 +158,7 @@ public class HttpClientDownloader implements Downloader {
|
|||
}
|
||||
|
||||
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
|
||||
String content = EntityUtils.toString(httpResponse.getEntity(), charset);
|
||||
String content = IOUtils.toString(httpResponse.getEntity().getContent(), charset);
|
||||
Page page = new Page();
|
||||
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
||||
page.setUrl(new PlainText(request.getUrl()));
|
||||
|
|
|
@ -16,7 +16,7 @@ import java.util.List;
|
|||
public class BaiduBaikePageProcesser implements PageProcessor {
|
||||
|
||||
private Site site = Site.me()//.setHttpProxy(new HttpHost("127.0.0.1",8888))
|
||||
.setCharset("utf-8").setRetryTimes(3).setSleepTime(1000).setUseGzip(true);
|
||||
.setRetryTimes(3).setSleepTime(1000).setUseGzip(true);
|
||||
|
||||
@Override
|
||||
public void process(Page page) {
|
||||
|
|
|
@ -4,7 +4,6 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.model.AfterExtractor;
|
||||
import us.codecraft.webmagic.model.OOSpider;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
import us.codecraft.webmagic.model.annotation.UrlTemplate;
|
||||
import us.codecraft.webmagic.model.direct.Param;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -12,10 +11,8 @@ import java.util.List;
|
|||
|
||||
/**
|
||||
* @since 0.4.0
|
||||
* NO implement yet!!!!!!!!!!!!
|
||||
* @author code4crafter@gmail.com
|
||||
*/
|
||||
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
|
||||
public class BaiduBaike implements AfterExtractor{
|
||||
|
||||
private String word;
|
||||
|
|
Loading…
Reference in New Issue