#571 add getBytes to Page
parent
c3bdb20458
commit
2183ba9b61
|
@ -46,6 +46,8 @@ public class Page {
|
|||
|
||||
private boolean downloadSuccess = true;
|
||||
|
||||
private byte[] bytes;
|
||||
|
||||
private List<Request> targetRequests = new ArrayList<Request>();
|
||||
|
||||
public Page() {
|
||||
|
@ -228,6 +230,14 @@ public class Page {
|
|||
this.downloadSuccess = downloadSuccess;
|
||||
}
|
||||
|
||||
public byte[] getBytes() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public void setBytes(byte[] bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Page{" +
|
||||
|
|
|
@ -45,6 +45,12 @@ public class Request implements Serializable {
|
|||
*/
|
||||
private long priority;
|
||||
|
||||
/**
|
||||
* When it is set to TRUE, the downloader will not try to parse response body to text.
|
||||
*
|
||||
*/
|
||||
private boolean binarayContent = false;
|
||||
|
||||
public Request() {
|
||||
}
|
||||
|
||||
|
@ -162,6 +168,14 @@ public class Request implements Serializable {
|
|||
this.requestBody = requestBody;
|
||||
}
|
||||
|
||||
public boolean isBinarayContent() {
|
||||
return binarayContent;
|
||||
}
|
||||
|
||||
public void setBinarayContent(boolean binarayContent) {
|
||||
this.binarayContent = binarayContent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Request{" +
|
||||
|
|
|
@ -108,9 +108,13 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
}
|
||||
|
||||
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
|
||||
String content = getResponseContent(charset, httpResponse);
|
||||
byte[] bytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
|
||||
String contentType = httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue();
|
||||
Page page = new Page();
|
||||
page.setRawText(content);
|
||||
page.setBytes(bytes);
|
||||
if (!request.isBinarayContent()){
|
||||
page.setRawText(getResponseContent(charset, contentType, bytes));
|
||||
}
|
||||
page.setUrl(new PlainText(request.getUrl()));
|
||||
page.setRequest(request);
|
||||
page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
|
||||
|
@ -121,22 +125,21 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
return page;
|
||||
}
|
||||
|
||||
private String getResponseContent(String charset, HttpResponse httpResponse) throws IOException {
|
||||
private String getResponseContent(String charset, String contentType, byte[] bytes) throws IOException {
|
||||
if (charset == null) {
|
||||
byte[] contentBytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
|
||||
String htmlCharset = getHtmlCharset(httpResponse, contentBytes);
|
||||
String htmlCharset = getHtmlCharset(contentType, bytes);
|
||||
if (htmlCharset != null) {
|
||||
return new String(contentBytes, htmlCharset);
|
||||
return new String(bytes, htmlCharset);
|
||||
} else {
|
||||
logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
|
||||
return new String(contentBytes);
|
||||
return new String(bytes);
|
||||
}
|
||||
} else {
|
||||
return IOUtils.toString(httpResponse.getEntity().getContent(), charset);
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
}
|
||||
|
||||
private String getHtmlCharset(HttpResponse httpResponse, byte[] contentBytes) throws IOException {
|
||||
return CharsetUtils.detectCharset(httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue(), contentBytes);
|
||||
private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
|
||||
return CharsetUtils.detectCharset(contentType, contentBytes);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -271,4 +271,22 @@ public class HttpClientDownloaderTest {
|
|||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_download_binary_content() throws Exception {
|
||||
HttpServer server = httpServer(13423);
|
||||
server.response("binary");
|
||||
Runner.running(server, new Runnable() {
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
|
||||
Request request = new Request();
|
||||
request.setBinarayContent(true);
|
||||
request.setUrl("http://127.0.0.1:13423/");
|
||||
Page page = httpClientDownloader.download(request, Site.me().toTask());
|
||||
assertThat(page.getRawText()).isNull();
|
||||
assertThat(page.getBytes()).isEqualTo("binary".getBytes());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue