update pom
parent
606417fdc7
commit
7003426898
7
pom.xml
7
pom.xml
|
@ -8,7 +8,7 @@
|
|||
<version>7</version>
|
||||
</parent>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<packaging>pom</packaging>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
|
@ -24,6 +24,11 @@
|
|||
<name>Yihua huang</name>
|
||||
<email>code4crafer@gmail.com</email>
|
||||
</developer>
|
||||
<developer>
|
||||
<id>yuany</id>
|
||||
<name>Ligang Yao</name>
|
||||
<email>ligang.yao@answers.com</email>
|
||||
</developer>
|
||||
</developers>
|
||||
<scm>
|
||||
<connection>scm:git:git@github.com:code4craft/webmagic.git</connection>
|
||||
|
|
|
@ -1,5 +1,15 @@
|
|||
Release Notes
|
||||
----
|
||||
*2012-8-20* `version:0.2.1`
|
||||
|
||||
ComboExtractor support for annotation.
|
||||
|
||||
Request priority support (using `PriorityScheduler`).
|
||||
|
||||
Complete some I18n work (comments and documents).
|
||||
|
||||
|
||||
|
||||
*2012-8-9* `version:0.2.0`
|
||||
|
||||
此次更新的主题是"方便"(之前的主题是"灵活")。
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ public class Request implements Serializable {
|
|||
* But no scheduler in webmagic supporting priority now (:
|
||||
*/
|
||||
@Experimental
|
||||
private double priority;
|
||||
private long priority;
|
||||
|
||||
public Request() {
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ public class Request implements Serializable {
|
|||
this.url = url;
|
||||
}
|
||||
|
||||
public double getPriority() {
|
||||
public long getPriority() {
|
||||
return priority;
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ public class Request implements Serializable {
|
|||
* @return this
|
||||
*/
|
||||
@Experimental
|
||||
public Request setPriority(double priority) {
|
||||
public Request setPriority(long priority) {
|
||||
this.priority = priority;
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
package us.codecraft.webmagic.scheduler;
|
||||
|
||||
import org.apache.http.annotation.ThreadSafe;
|
||||
import org.apache.log4j.Logger;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
|
||||
/**
|
||||
* Priority scheduler. Request with higher priority will poll earlier. <br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.1
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class PriorityScheduler implements Scheduler {
|
||||
|
||||
public static final int INITIAL_CAPACITY = 5;
|
||||
|
||||
private Logger logger = Logger.getLogger(getClass());
|
||||
|
||||
private BlockingQueue<Request> noPriorityQueue = new LinkedBlockingQueue<Request>();
|
||||
|
||||
private PriorityBlockingQueue<Request> priorityQueuePlus = new PriorityBlockingQueue<Request>(INITIAL_CAPACITY, new Comparator<Request>() {
|
||||
@Override
|
||||
public int compare(Request o1, Request o2) {
|
||||
return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
|
||||
}
|
||||
});
|
||||
|
||||
private PriorityBlockingQueue<Request> priorityQueueMinus = new PriorityBlockingQueue<Request>(INITIAL_CAPACITY, new Comparator<Request>() {
|
||||
@Override
|
||||
public int compare(Request o1, Request o2) {
|
||||
return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
|
||||
}
|
||||
});
|
||||
|
||||
private Set<String> urls = new HashSet<String>();
|
||||
|
||||
@Override
|
||||
public synchronized void push(Request request, Task task) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("push to queue " + request.getUrl());
|
||||
}
|
||||
if (urls.add(request.getUrl())) {
|
||||
if (request.getPriority() == 0) {
|
||||
noPriorityQueue.add(request);
|
||||
} else if (request.getPriority() > 0) {
|
||||
priorityQueuePlus.put(request);
|
||||
} else {
|
||||
priorityQueueMinus.put(request);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Request poll(Task task) {
|
||||
Request poll = priorityQueuePlus.poll();
|
||||
if (poll != null) {
|
||||
return poll;
|
||||
}
|
||||
poll = noPriorityQueue.poll();
|
||||
if (poll != null) {
|
||||
return poll;
|
||||
}
|
||||
return priorityQueueMinus.poll();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package us.codecraft.webmagic.scheduler;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
*/
|
||||
public class PrioritySchedulerTest {
|
||||
|
||||
private PriorityScheduler priorityScheduler = new PriorityScheduler();
|
||||
|
||||
private Task task = new Task() {
|
||||
@Override
|
||||
public String getUUID() {
|
||||
return "1";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
@Test
|
||||
public void testDifferentPriority() {
|
||||
Request request = new Request("a");
|
||||
request.setPriority(100);
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
request = new Request("b");
|
||||
request.setPriority(900);
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
request = new Request("c");
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
request = new Request("d");
|
||||
request.setPriority(-900);
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
Request poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("b",poll.getUrl());
|
||||
poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("a",poll.getUrl());
|
||||
poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("c",poll.getUrl());
|
||||
poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("d",poll.getUrl());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoPriority() {
|
||||
Request request = new Request("a");
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
request = new Request("b");
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
request = new Request("c");
|
||||
priorityScheduler.push(request,task);
|
||||
|
||||
Request poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("a",poll.getUrl());
|
||||
|
||||
poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("b",poll.getUrl());
|
||||
|
||||
poll = priorityScheduler.poll(task);
|
||||
Assert.assertEquals("c",poll.getUrl());
|
||||
}
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.2.1-SNAPSHOT</version>
|
||||
<version>0.2.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
Loading…
Reference in New Issue