parent
3f756c9325
commit
4bedd97267
|
@ -426,7 +426,7 @@ public class Spider implements Runnable, Task {
|
||||||
}
|
}
|
||||||
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
|
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
|
||||||
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
|
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
|
||||||
failHandler(request);
|
downloader.refreshComponent(this);
|
||||||
}else {
|
}else {
|
||||||
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
|
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
|
||||||
}
|
}
|
||||||
|
@ -435,11 +435,6 @@ public class Spider implements Runnable, Task {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void onDownloaderFail(Request request) {
|
private void onDownloaderFail(Request request) {
|
||||||
failHandler(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void failHandler(Request request){
|
|
||||||
downloader.refreshComponent(this);
|
|
||||||
if (site.getCycleRetryTimes() == 0) {
|
if (site.getCycleRetryTimes() == 0) {
|
||||||
sleep(site.getSleepTime());
|
sleep(site.getSleepTime());
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -54,7 +54,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
this.refreshClientOnError = clientOnError;
|
this.refreshClientOnError = clientOnError;
|
||||||
}
|
}
|
||||||
public void setRefreshProxyOnError(Predicate<Throwable> proxyOnError) {
|
public void setRefreshProxyOnError(Predicate<Throwable> proxyOnError) {
|
||||||
this.refreshProxyOnError = proxyOnError;
|
this.refreshProxyOnError = refreshProxyOnError;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
|
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
|
||||||
|
@ -94,7 +94,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
logger.warn("download page {} error", request.getUrl(), e);
|
logger.warn("download page {} error", request.getUrl(), e);
|
||||||
onError(request, e, proxyProvider);
|
onError(request, e, proxyProvider);
|
||||||
if (proxyProvider != null && refreshProxyOnError.test(e)) {
|
if (proxyProvider != null && refreshProxyOnError.test(e)) {
|
||||||
proxyProvider.refreshProxy(task,proxy);
|
proxyProvider.refreshProxy(task);
|
||||||
}
|
}
|
||||||
if(refreshClientOnError.test(e)) {
|
if(refreshClientOnError.test(e)) {
|
||||||
httpClients.remove(task.getSite().getDomain());
|
httpClients.remove(task.getSite().getDomain());
|
||||||
|
@ -115,7 +115,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
@Override
|
@Override
|
||||||
public void refreshComponent(Task task) {
|
public void refreshComponent(Task task) {
|
||||||
if (proxyProvider != null ) {
|
if (proxyProvider != null ) {
|
||||||
proxyProvider.refreshProxy(task,proxyProvider.getCurrentProxy(task));
|
proxyProvider.refreshProxy(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
httpClients.remove(task.getSite().getDomain());
|
httpClients.remove(task.getSite().getDomain());
|
||||||
|
|
|
@ -143,7 +143,6 @@ public class HttpClientGenerator {
|
||||||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||||
socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true);
|
socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true);
|
||||||
socketConfigBuilder.setSoTimeout(site.getTimeOut());
|
socketConfigBuilder.setSoTimeout(site.getTimeOut());
|
||||||
|
|
||||||
SocketConfig socketConfig = socketConfigBuilder.build();
|
SocketConfig socketConfig = socketConfigBuilder.build();
|
||||||
httpClientBuilder.setDefaultSocketConfig(socketConfig);
|
httpClientBuilder.setDefaultSocketConfig(socketConfig);
|
||||||
connectionManager.setDefaultSocketConfig(socketConfig);
|
connectionManager.setDefaultSocketConfig(socketConfig);
|
||||||
|
|
|
@ -23,19 +23,8 @@ public interface ProxyProvider {
|
||||||
* 代理IP是珍贵资源,有可能代理提供者内部代理没有过期,就一直提供某个IP,但这个IP又不可以使用,所以提供一种方式通知提供者,这个代理该刷新了
|
* 代理IP是珍贵资源,有可能代理提供者内部代理没有过期,就一直提供某个IP,但这个IP又不可以使用,所以提供一种方式通知提供者,这个代理该刷新了
|
||||||
*
|
*
|
||||||
* @param task 下载任务
|
* @param task 下载任务
|
||||||
* @param proxy 需要对代理进行验证,如果确实持有的时错误代理,则刷新,否则,继续执行
|
|
||||||
*/
|
*/
|
||||||
void refreshProxy(Task task,Proxy proxy);
|
void refreshProxy(Task task);
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* 获取当前正在提供的代理
|
|
||||||
*
|
|
||||||
* @param task
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
Proxy getCurrentProxy(Task task);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a proxy for task by some strategy.
|
* Get a proxy for task by some strategy.
|
||||||
|
|
|
@ -31,12 +31,7 @@ public class SimpleProxyProvider implements ProxyProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Proxy getCurrentProxy(Task task) {
|
public void refreshProxy(Task task) {
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refreshProxy(Task task,Proxy proxy) {
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue