diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java index 6b7ebae..ecbeecb 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java @@ -32,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler { @Override public void push(Request request, Task task) { logger.trace("get a candidate url {}", request.getUrl()); - if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request) || noNeedToRemoveDuplicate(request)) { + if (shouldReserved(request) || noNeedToRemoveDuplicate(request) || !duplicatedRemover.isDuplicate(request, task)) { logger.debug("push to queue {}", request.getUrl()); pushWhenNoDuplicate(request, task); } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java new file mode 100644 index 0000000..da69129 --- /dev/null +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java @@ -0,0 +1,39 @@ +package us.codecraft.webmagic.scheduler; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.mockito.runners.MockitoJUnitRunner; +import us.codecraft.webmagic.Request; +import us.codecraft.webmagic.Task; +import us.codecraft.webmagic.scheduler.component.DuplicateRemover; +import us.codecraft.webmagic.utils.HttpConstant; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * @author code4crafter@gmail.com + * Date: 17/3/11 + * Time: 上午11:26 + */ +@RunWith(MockitoJUnitRunner.class) +public class DuplicateRemovedSchedulerTest { + + @Test + public void test_no_duplicate_removed_for_post_request() throws Exception { + DuplicateRemovedScheduler duplicateRemovedScheduler = new DuplicateRemovedScheduler() { + @Override + public Request poll(Task task) { + return null; + } + }; + DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class); + duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover); + Request request = new Request("https://www.google.com/"); + request.setMethod(HttpConstant.Method.POST); + duplicateRemovedScheduler.push(request, null); + verify(duplicateRemover,times(0)).isDuplicate(any(Request.class),any(Task.class)); + } +} diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/samples/scheduler/DuplicateRemovedSchedulerTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/samples/scheduler/DuplicateRemovedSchedulerTest.java deleted file mode 100644 index 6f7a5d1..0000000 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/samples/scheduler/DuplicateRemovedSchedulerTest.java +++ /dev/null @@ -1,17 +0,0 @@ -package us.codecraft.webmagic.samples.scheduler; - -import org.junit.Test; - -/** - * @author code4crafter@gmail.com - * Date: 17/3/11 - * Time: 上午11:26 - */ -public class DuplicateRemovedSchedulerTest { - - @Test - public void testDuplicateRemoved() throws Exception { - - - } -}