刷新代理api重构,需要提供旧代理,如果依然是旧代理,才进行刷新,防止应延迟响应造成的过度刷新
parent
2e2a0fdf3e
commit
0aa2c3949d
|
@ -421,7 +421,7 @@ public class Spider implements Runnable, Task {
|
|||
}
|
||||
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
|
||||
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
|
||||
downloader.refreshComponent(this);
|
||||
failHandler(request);
|
||||
}else {
|
||||
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
|
||||
}
|
||||
|
@ -430,6 +430,11 @@ public class Spider implements Runnable, Task {
|
|||
}
|
||||
|
||||
private void onDownloaderFail(Request request) {
|
||||
failHandler(request);
|
||||
}
|
||||
|
||||
private void failHandler(Request request){
|
||||
downloader.refreshComponent(this);
|
||||
if (site.getCycleRetryTimes() == 0) {
|
||||
sleep(site.getSleepTime());
|
||||
} else {
|
||||
|
|
|
@ -54,7 +54,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
this.refreshClientOnError = clientOnError;
|
||||
}
|
||||
public void setRefreshProxyOnError(Predicate<Throwable> proxyOnError) {
|
||||
this.refreshProxyOnError = refreshProxyOnError;
|
||||
this.refreshProxyOnError = proxyOnError;
|
||||
}
|
||||
|
||||
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
|
||||
|
@ -94,7 +94,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
logger.warn("download page {} error", request.getUrl(), e);
|
||||
onError(request, e, proxyProvider);
|
||||
if (proxyProvider != null && refreshProxyOnError.test(e)) {
|
||||
proxyProvider.refreshProxy(task);
|
||||
proxyProvider.refreshProxy(task,proxy);
|
||||
}
|
||||
if(refreshClientOnError.test(e)) {
|
||||
httpClients.remove(task.getSite().getDomain());
|
||||
|
@ -115,7 +115,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
|||
@Override
|
||||
public void refreshComponent(Task task) {
|
||||
if (proxyProvider != null ) {
|
||||
proxyProvider.refreshProxy(task);
|
||||
proxyProvider.refreshProxy(task,proxyProvider.getCurrentProxy(task));
|
||||
}
|
||||
|
||||
httpClients.remove(task.getSite().getDomain());
|
||||
|
|
|
@ -143,6 +143,7 @@ public class HttpClientGenerator {
|
|||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||
socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true);
|
||||
socketConfigBuilder.setSoTimeout(site.getTimeOut());
|
||||
|
||||
SocketConfig socketConfig = socketConfigBuilder.build();
|
||||
httpClientBuilder.setDefaultSocketConfig(socketConfig);
|
||||
connectionManager.setDefaultSocketConfig(socketConfig);
|
||||
|
|
|
@ -23,8 +23,19 @@ public interface ProxyProvider {
|
|||
* 代理IP是珍贵资源,有可能代理提供者内部代理没有过期,就一直提供某个IP,但这个IP又不可以使用,所以提供一种方式通知提供者,这个代理该刷新了
|
||||
*
|
||||
* @param task 下载任务
|
||||
* @param proxy 需要对代理进行验证,如果确实持有的时错误代理,则刷新,否则,继续执行
|
||||
*/
|
||||
void refreshProxy(Task task);
|
||||
void refreshProxy(Task task,Proxy proxy);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* 获取当前正在提供的代理
|
||||
*
|
||||
* @param task
|
||||
* @return
|
||||
*/
|
||||
Proxy getCurrentProxy(Task task);
|
||||
|
||||
/**
|
||||
* Get a proxy for task by some strategy.
|
||||
|
|
|
@ -31,7 +31,12 @@ public class SimpleProxyProvider implements ProxyProvider {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void refreshProxy(Task task) {
|
||||
public Proxy getCurrentProxy(Task task) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refreshProxy(Task task,Proxy proxy) {
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue