From ba69eba669d32fadbbe8b021b85b9b458d2db6aa Mon Sep 17 00:00:00 2001 From: yao Date: Mon, 21 Dec 2020 14:36:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BB=A3=E7=90=86=E6=8E=A5=E5=8F=A3=E7=9A=84?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=8C=E6=8F=90=E4=BE=9B=E5=88=B7=E6=98=9F?= =?UTF-8?q?=E4=BB=A3=E7=90=86API=E3=80=82downloader=20=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E6=97=B6=EF=BC=8C=E6=8F=90=E4=BE=9Brequest,e?= =?UTF-8?q?xception,proxyProvider=E4=B8=89=E4=B8=AA=E5=8F=82=E6=95=B0?= =?UTF-8?q?=EF=BC=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../codecraft/webmagic/downloader/AbstractDownloader.java | 3 ++- .../webmagic/downloader/HttpClientDownloader.java | 2 +- .../java/us/codecraft/webmagic/proxy/ProxyProvider.java | 7 +++++++ .../us/codecraft/webmagic/proxy/SimpleProxyProvider.java | 5 +++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/AbstractDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/AbstractDownloader.java index c27292d..05f5686 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/AbstractDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/AbstractDownloader.java @@ -3,6 +3,7 @@ package us.codecraft.webmagic.downloader; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.proxy.ProxyProvider; import us.codecraft.webmagic.selector.Html; /** @@ -38,7 +39,7 @@ public abstract class AbstractDownloader implements Downloader { protected void onSuccess(Request request) { } - protected void onError(Request request) { + protected void onError(Request request, Throwable throwable, ProxyProvider proxyProvider) { } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 24889c8..757cdd3 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -87,7 +87,7 @@ public class HttpClientDownloader extends AbstractDownloader { return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); - onError(request); + onError(request,e,proxyProvider); return page; } finally { if (httpResponse != null) { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java index 0cef4ed..da3bec9 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java @@ -19,6 +19,13 @@ public interface ProxyProvider { */ void returnProxy(Proxy proxy, Page page, Task task); + /** + * 代理IP是珍贵资源,有可能代理提供者内部代理没有过期,就一直提供某个IP,但这个IP又不可以使用,所以提供一种方式通知提供者,这个代理该刷新了 + * + * @param task 下载任务 + */ + void refreshProxy(Task task); + /** * Get a proxy for task by some strategy. * @param task the download task diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java index ddef6a8..fd80b30 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java @@ -30,6 +30,11 @@ public class SimpleProxyProvider implements ProxyProvider { this.pointer = pointer; } + @Override + public void refreshProxy(Task task) { + + } + public static SimpleProxyProvider from(Proxy... proxies) { List proxiesTemp = new ArrayList(proxies.length); for (Proxy proxy : proxies) {