#33 ignore 'content-encoding' when redirect
parent
8f774afc84
commit
00b0a751b4
|
@ -1,9 +1,8 @@
|
||||||
package us.codecraft.webmagic.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import org.apache.http.HttpException;
|
import org.apache.http.*;
|
||||||
import org.apache.http.HttpRequest;
|
|
||||||
import org.apache.http.HttpRequestInterceptor;
|
|
||||||
import org.apache.http.client.CookieStore;
|
import org.apache.http.client.CookieStore;
|
||||||
|
import org.apache.http.client.protocol.ResponseContentEncoding;
|
||||||
import org.apache.http.config.Registry;
|
import org.apache.http.config.Registry;
|
||||||
import org.apache.http.config.RegistryBuilder;
|
import org.apache.http.config.RegistryBuilder;
|
||||||
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
||||||
|
@ -60,31 +59,24 @@ public class HttpClientGenerator {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
// httpClientBuilder.disableContentCompression().addInterceptorFirst(new HttpResponseInterceptor() {
|
// Http client has some problem handling compressing entity for redirect
|
||||||
//
|
// So I disable it and do it manually
|
||||||
// public void process(
|
// https://issues.apache.org/jira/browse/HTTPCLIENT-1432
|
||||||
// final HttpResponse response,
|
httpClientBuilder.disableContentCompression();
|
||||||
// final HttpContext context) throws HttpException, IOException {
|
httpClientBuilder.addInterceptorFirst(new HttpResponseInterceptor() {
|
||||||
// if (response.getStatusLine().getStatusCode() != 200) {
|
|
||||||
// return;
|
private ResponseContentEncoding contentEncoding = new ResponseContentEncoding();
|
||||||
// }
|
|
||||||
// HttpEntity entity = response.getEntity();
|
public void process(
|
||||||
// if (entity != null) {
|
final HttpResponse response,
|
||||||
// Header ceheader = entity.getContentEncoding();
|
final HttpContext context) throws HttpException, IOException {
|
||||||
// if (ceheader != null) {
|
if (response.getStatusLine().getStatusCode() == 301 || response.getStatusLine().getStatusCode() == 302) {
|
||||||
// HeaderElement[] codecs = ceheader.getElements();
|
return;
|
||||||
// for (int i = 0; i < codecs.length; i++) {
|
}
|
||||||
// if (codecs[i].getName().equalsIgnoreCase("gzip")) {
|
contentEncoding.process(response, context);
|
||||||
// response.setEntity(
|
}
|
||||||
// new GzipDecompressingEntity(response.getEntity()));
|
|
||||||
// return;
|
});
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// });
|
|
||||||
if (site != null) {
|
if (site != null) {
|
||||||
httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
|
httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,6 @@ public class BaiduBaikePageProcesser implements PageProcessor {
|
||||||
list.add(String.format(urlTemplate,"风力发电"));
|
list.add(String.format(urlTemplate,"风力发电"));
|
||||||
list.add(String.format(urlTemplate,"太阳能"));
|
list.add(String.format(urlTemplate,"太阳能"));
|
||||||
list.add(String.format(urlTemplate,"地热发电"));
|
list.add(String.format(urlTemplate,"地热发电"));
|
||||||
list.add(String.format(urlTemplate,"众数"));
|
|
||||||
list.add(String.format(urlTemplate,"地热发电"));
|
list.add(String.format(urlTemplate,"地热发电"));
|
||||||
List<ResultItems> resultItemses = spider.getAll(list);
|
List<ResultItems> resultItemses = spider.getAll(list);
|
||||||
for (ResultItems resultItemse : resultItemses) {
|
for (ResultItems resultItemse : resultItemses) {
|
||||||
|
|
Loading…
Reference in New Issue