#613 add charset to page
parent
65049baca4
commit
32f1f2cf44
|
@ -113,7 +113,11 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
Page page = new Page();
|
Page page = new Page();
|
||||||
page.setBytes(bytes);
|
page.setBytes(bytes);
|
||||||
if (!request.isBinaryContent()){
|
if (!request.isBinaryContent()){
|
||||||
page.setRawText(getResponseContent(charset, contentType, bytes));
|
if (charset == null) {
|
||||||
|
charset = getHtmlCharset(contentType, bytes);
|
||||||
|
}
|
||||||
|
page.setCharset(charset);
|
||||||
|
page.setRawText(new String(bytes, charset));
|
||||||
}
|
}
|
||||||
page.setUrl(new PlainText(request.getUrl()));
|
page.setUrl(new PlainText(request.getUrl()));
|
||||||
page.setRequest(request);
|
page.setRequest(request);
|
||||||
|
@ -125,21 +129,12 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getResponseContent(String charset, String contentType, byte[] bytes) throws IOException {
|
|
||||||
if (charset == null) {
|
|
||||||
String htmlCharset = getHtmlCharset(contentType, bytes);
|
|
||||||
if (htmlCharset != null) {
|
|
||||||
return new String(bytes, htmlCharset);
|
|
||||||
} else {
|
|
||||||
logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
|
|
||||||
return new String(bytes);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return new String(bytes, charset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
|
private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
|
||||||
return CharsetUtils.detectCharset(contentType, contentBytes);
|
String charset = CharsetUtils.detectCharset(contentType, contentBytes);
|
||||||
|
if (charset == null) {
|
||||||
|
charset = Charset.defaultCharset().name();
|
||||||
|
logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
|
||||||
|
}
|
||||||
|
return charset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue