From 4a035e729a52432bca196dd2d1e3d305888b3468 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sun, 13 Apr 2014 23:31:13 +0800 Subject: [PATCH] extension point for LocalDuplicatedRemovedScheduler #95 --- .../webmagic/scheduler/LocalDuplicatedRemovedScheduler.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java index 397199c..449c3f6 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java @@ -24,11 +24,15 @@ public abstract class LocalDuplicatedRemovedScheduler implements Scheduler { @Override public void push(Request request, Task task) { logger.trace("get a candidate url {}", request.getUrl()); - if (request.getExtra(Request.CYCLE_TRIED_TIMES) != null || urls.add(request.getUrl())) { + if (urls.add(request.getUrl()) || shouldReserved(request)) { logger.debug("push to queue {}", request.getUrl()); pushWhenNoDuplicate(request, task); } } + protected boolean shouldReserved(Request request) { + return request.getExtra(Request.CYCLE_TRIED_TIMES) != null; + } + protected abstract void pushWhenNoDuplicate(Request request, Task task); }