From 5f106c9c69abfc8a264be9cad3f25b714d6823af Mon Sep 17 00:00:00 2001 From: zhangheng09 Date: Sat, 12 Mar 2016 20:03:27 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=BD=93page=E4=B8=BAnull=E6=97=B6?= =?UTF-8?q?=EF=BC=8C=E6=84=8F=E5=91=B3=E7=9D=80=E9=9D=9E=E6=AD=A3=E5=B8=B8?= =?UTF-8?q?=E7=9A=84=E5=93=8D=E5=BA=94=E7=8A=B6=E6=80=81=EF=BC=8C=E5=BA=94?= =?UTF-8?q?=E8=AF=A5=E6=8A=9B=E5=87=BA=E5=BC=82=E5=B8=B8=EF=BC=8C=E5=90=A6?= =?UTF-8?q?=E5=88=99SpiderListener=E7=9A=84onSuccess=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E5=92=8ConError=E6=96=B9=E6=B3=95=E9=83=BD=E4=BC=9A=E6=89=A7?= =?UTF-8?q?=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 0fd770d..d62130b 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -408,9 +408,7 @@ public class Spider implements Runnable, Task { protected void processRequest(Request request) { Page page = downloader.download(request, this); if (page == null) { - sleep(site.getRetrySleepTime()); - onError(request); - return; + throw new RuntimeException("unaccpetable response status"); } // for cycle retry if (page.isNeedCycleRetry()) { From 6b179c3d55d6e72676723ccdf0b7fa11870a7081 Mon Sep 17 00:00:00 2001 From: zhangheng09 Date: Sat, 12 Mar 2016 20:09:41 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E8=BF=99=E4=B8=AA=E6=94=B9=E5=8A=A8?= =?UTF-8?q?=E7=9A=84=E5=8E=9F=E5=9B=A0=E5=9F=BA=E4=BA=8E=E4=B8=A4=E7=82=B9?= =?UTF-8?q?=EF=BC=9A1=EF=BC=89=E4=BB=A3=E7=90=86=E5=BD=92=E8=BF=98?= =?UTF-8?q?=E7=BB=99=E4=BB=A3=E7=90=86=E6=B1=A0=E7=9A=84=E6=97=B6=E6=9C=BA?= =?UTF-8?q?=E5=BA=94=E8=AF=A5=E6=98=AF=E6=89=A7=E8=A1=8C=E5=AE=8Chttp?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=E5=90=8E=E5=B0=B1=E8=A6=81=E5=B0=BD=E6=97=A9?= =?UTF-8?q?=E5=BD=92=E8=BF=98=202)http=E4=BB=A3=E7=90=86=E5=BA=94=E8=AF=A5?= =?UTF-8?q?=E6=98=AFHttpClientDownloader=E8=AF=A5=E8=80=83=E8=99=91?= =?UTF-8?q?=E7=9A=84=E4=BA=8B=EF=BC=8C=E4=B8=8D=E5=BA=94=E8=AF=A5=E6=9C=89?= =?UTF-8?q?Spider=E6=9D=A5=E5=A4=84=E7=90=86=EF=BC=8CSpider=E5=B9=B6?= =?UTF-8?q?=E4=B8=8D=E7=9F=A5=E9=81=93=E5=AE=83=E7=9A=84downloader?= =?UTF-8?q?=E6=98=AF=E4=B8=AAHttpClientDownloader?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java | 4 ---- .../codecraft/webmagic/downloader/HttpClientDownloader.java | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index d62130b..a2be633 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -325,10 +325,6 @@ public class Spider implements Runnable, Task { onError(requestFinal); logger.error("process request " + requestFinal + " error", e); } finally { - if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) { - site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal - .getExtra(Request.STATUS_CODE)); - } pageCount.incrementAndGet(); signalNewUrl(); } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 95d99ce..dada899 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -109,6 +109,10 @@ public class HttpClientDownloader extends AbstractDownloader { return null; } finally { request.putExtra(Request.STATUS_CODE, statusCode); + if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) { + site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request + .getExtra(Request.STATUS_CODE)); + } try { if (httpResponse != null) { //ensure the connection is released back to pool