remove duplicate check for POST request #484
parent
45bf2b6fd7
commit
5215a492cc
|
@ -6,6 +6,7 @@ import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
|
||||||
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
|
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
|
||||||
|
import us.codecraft.webmagic.utils.HttpConstant;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove duplicate urls and only push urls which are not duplicate.<br><br>
|
* Remove duplicate urls and only push urls which are not duplicate.<br><br>
|
||||||
|
@ -31,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
|
||||||
@Override
|
@Override
|
||||||
public void push(Request request, Task task) {
|
public void push(Request request, Task task) {
|
||||||
logger.trace("get a candidate url {}", request.getUrl());
|
logger.trace("get a candidate url {}", request.getUrl());
|
||||||
if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request)) {
|
if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request) || noNeedToRemoveDuplicate(request)) {
|
||||||
logger.debug("push to queue {}", request.getUrl());
|
logger.debug("push to queue {}", request.getUrl());
|
||||||
pushWhenNoDuplicate(request, task);
|
pushWhenNoDuplicate(request, task);
|
||||||
}
|
}
|
||||||
|
@ -41,6 +42,10 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
|
||||||
return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
|
return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean noNeedToRemoveDuplicate(Request request) {
|
||||||
|
return HttpConstant.Method.POST.equalsIgnoreCase(request.getMethod());
|
||||||
|
}
|
||||||
|
|
||||||
protected void pushWhenNoDuplicate(Request request, Task task) {
|
protected void pushWhenNoDuplicate(Request request, Task task) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue