update pom
parent
606417fdc7
commit
7003426898
7
pom.xml
7
pom.xml
|
@ -8,7 +8,7 @@
|
||||||
<version>7</version>
|
<version>7</version>
|
||||||
</parent>
|
</parent>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
|
@ -24,6 +24,11 @@
|
||||||
<name>Yihua huang</name>
|
<name>Yihua huang</name>
|
||||||
<email>code4crafer@gmail.com</email>
|
<email>code4crafer@gmail.com</email>
|
||||||
</developer>
|
</developer>
|
||||||
|
<developer>
|
||||||
|
<id>yuany</id>
|
||||||
|
<name>Ligang Yao</name>
|
||||||
|
<email>ligang.yao@answers.com</email>
|
||||||
|
</developer>
|
||||||
</developers>
|
</developers>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>scm:git:git@github.com:code4craft/webmagic.git</connection>
|
<connection>scm:git:git@github.com:code4craft/webmagic.git</connection>
|
||||||
|
|
|
@ -1,5 +1,15 @@
|
||||||
Release Notes
|
Release Notes
|
||||||
----
|
----
|
||||||
|
*2012-8-20* `version:0.2.1`
|
||||||
|
|
||||||
|
ComboExtractor support for annotation.
|
||||||
|
|
||||||
|
Request priority support (using `PriorityScheduler`).
|
||||||
|
|
||||||
|
Complete some I18n work (comments and documents).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
*2012-8-9* `version:0.2.0`
|
*2012-8-9* `version:0.2.0`
|
||||||
|
|
||||||
此次更新的主题是"方便"(之前的主题是"灵活")。
|
此次更新的主题是"方便"(之前的主题是"灵活")。
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class Request implements Serializable {
|
||||||
* But no scheduler in webmagic supporting priority now (:
|
* But no scheduler in webmagic supporting priority now (:
|
||||||
*/
|
*/
|
||||||
@Experimental
|
@Experimental
|
||||||
private double priority;
|
private long priority;
|
||||||
|
|
||||||
public Request() {
|
public Request() {
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@ public class Request implements Serializable {
|
||||||
this.url = url;
|
this.url = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getPriority() {
|
public long getPriority() {
|
||||||
return priority;
|
return priority;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ public class Request implements Serializable {
|
||||||
* @return this
|
* @return this
|
||||||
*/
|
*/
|
||||||
@Experimental
|
@Experimental
|
||||||
public Request setPriority(double priority) {
|
public Request setPriority(long priority) {
|
||||||
this.priority = priority;
|
this.priority = priority;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
package us.codecraft.webmagic.scheduler;
|
||||||
|
|
||||||
|
import org.apache.http.annotation.ThreadSafe;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.Task;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.PriorityBlockingQueue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Priority scheduler. Request with higher priority will poll earlier. <br>
|
||||||
|
*
|
||||||
|
* @author code4crafter@gmail.com <br>
|
||||||
|
* @since 0.2.1
|
||||||
|
*/
|
||||||
|
@ThreadSafe
|
||||||
|
public class PriorityScheduler implements Scheduler {
|
||||||
|
|
||||||
|
public static final int INITIAL_CAPACITY = 5;
|
||||||
|
|
||||||
|
private Logger logger = Logger.getLogger(getClass());
|
||||||
|
|
||||||
|
private BlockingQueue<Request> noPriorityQueue = new LinkedBlockingQueue<Request>();
|
||||||
|
|
||||||
|
private PriorityBlockingQueue<Request> priorityQueuePlus = new PriorityBlockingQueue<Request>(INITIAL_CAPACITY, new Comparator<Request>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Request o1, Request o2) {
|
||||||
|
return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
private PriorityBlockingQueue<Request> priorityQueueMinus = new PriorityBlockingQueue<Request>(INITIAL_CAPACITY, new Comparator<Request>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Request o1, Request o2) {
|
||||||
|
return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
private Set<String> urls = new HashSet<String>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void push(Request request, Task task) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("push to queue " + request.getUrl());
|
||||||
|
}
|
||||||
|
if (urls.add(request.getUrl())) {
|
||||||
|
if (request.getPriority() == 0) {
|
||||||
|
noPriorityQueue.add(request);
|
||||||
|
} else if (request.getPriority() > 0) {
|
||||||
|
priorityQueuePlus.put(request);
|
||||||
|
} else {
|
||||||
|
priorityQueueMinus.put(request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized Request poll(Task task) {
|
||||||
|
Request poll = priorityQueuePlus.poll();
|
||||||
|
if (poll != null) {
|
||||||
|
return poll;
|
||||||
|
}
|
||||||
|
poll = noPriorityQueue.poll();
|
||||||
|
if (poll != null) {
|
||||||
|
return poll;
|
||||||
|
}
|
||||||
|
return priorityQueueMinus.poll();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
package us.codecraft.webmagic.scheduler;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.Task;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com <br>
|
||||||
|
*/
|
||||||
|
public class PrioritySchedulerTest {
|
||||||
|
|
||||||
|
private PriorityScheduler priorityScheduler = new PriorityScheduler();
|
||||||
|
|
||||||
|
private Task task = new Task() {
|
||||||
|
@Override
|
||||||
|
public String getUUID() {
|
||||||
|
return "1";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Site getSite() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDifferentPriority() {
|
||||||
|
Request request = new Request("a");
|
||||||
|
request.setPriority(100);
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
request = new Request("b");
|
||||||
|
request.setPriority(900);
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
request = new Request("c");
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
request = new Request("d");
|
||||||
|
request.setPriority(-900);
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
Request poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("b",poll.getUrl());
|
||||||
|
poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("a",poll.getUrl());
|
||||||
|
poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("c",poll.getUrl());
|
||||||
|
poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("d",poll.getUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoPriority() {
|
||||||
|
Request request = new Request("a");
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
request = new Request("b");
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
request = new Request("c");
|
||||||
|
priorityScheduler.push(request,task);
|
||||||
|
|
||||||
|
Request poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("a",poll.getUrl());
|
||||||
|
|
||||||
|
poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("b",poll.getUrl());
|
||||||
|
|
||||||
|
poll = priorityScheduler.poll(task);
|
||||||
|
Assert.assertEquals("c",poll.getUrl());
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<version>0.2.1-SNAPSHOT</version>
|
<version>0.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue