more error log on page code error #601

master
yihua.huang 2017-06-17 10:36:41 +08:00
parent 6bd1eed25e
commit 4111b07263
2 changed files with 3 additions and 2 deletions

View File

@ -418,6 +418,8 @@ public class Spider implements Runnable, Task {
pipeline.process(page.getResultItems(), this); pipeline.process(page.getResultItems(), this);
} }
} }
} else {
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
} }
sleep(site.getSleepTime()); sleep(site.getSleepTime());
return; return;

View File

@ -76,7 +76,6 @@ public class HttpClientDownloader extends AbstractDownloader {
if (task == null || task.getSite() == null) { if (task == null || task.getSite() == null) {
throw new NullPointerException("task or site can not be null"); throw new NullPointerException("task or site can not be null");
} }
logger.debug("downloading page {}", request.getUrl());
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
CloseableHttpClient httpClient = getHttpClient(task.getSite()); CloseableHttpClient httpClient = getHttpClient(task.getSite());
Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null;
@ -86,7 +85,7 @@ public class HttpClientDownloader extends AbstractDownloader {
httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
page = handleResponse(request, task.getSite().getCharset(), httpResponse, task); page = handleResponse(request, task.getSite().getCharset(), httpResponse, task);
onSuccess(request); onSuccess(request);
logger.debug("downloading page success {}", page); logger.info("downloading page success {}", request.getUrl());
return page; return page;
} catch (IOException e) { } catch (IOException e) {
logger.warn("download page {} error", request.getUrl(), e); logger.warn("download page {} error", request.getUrl(), e);