diff --git a/pom.xml b/pom.xml
index b692f87..a0c9993 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
7
us.codecraft
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
pom
webmagic-parent
@@ -24,6 +24,11 @@
Yihua huang
code4crafer@gmail.com
+
+ yuany
+ Ligang Yao
+ ligang.yao@answers.com
+
scm:git:git@github.com:code4craft/webmagic.git
diff --git a/release-note.md b/release-note.md
index 9582302..7e8f958 100755
--- a/release-note.md
+++ b/release-note.md
@@ -1,5 +1,15 @@
Release Notes
----
+*2012-8-20* `version:0.2.1`
+
+ComboExtractor support for annotation.
+
+Request priority support (using `PriorityScheduler`).
+
+Complete some I18n work (comments and documents).
+
+
+
*2012-8-9* `version:0.2.0`
此次更新的主题是"方便"(之前的主题是"灵活")。
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index c8d1e73..28d5507 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -5,7 +5,7 @@
us.codecraft
webmagic-parent
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
index fd7f60c..bbea59f 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
@@ -31,7 +31,7 @@ public class Request implements Serializable {
* But no scheduler in webmagic supporting priority now (:
*/
@Experimental
- private double priority;
+ private long priority;
public Request() {
}
@@ -40,7 +40,7 @@ public class Request implements Serializable {
this.url = url;
}
- public double getPriority() {
+ public long getPriority() {
return priority;
}
@@ -53,7 +53,7 @@ public class Request implements Serializable {
* @return this
*/
@Experimental
- public Request setPriority(double priority) {
+ public Request setPriority(long priority) {
this.priority = priority;
return this;
}
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 8a5059d..37322f6 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -5,7 +5,7 @@
us.codecraft
webmagic-parent
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
new file mode 100644
index 0000000..7ce44f0
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
@@ -0,0 +1,74 @@
+package us.codecraft.webmagic.scheduler;
+
+import org.apache.http.annotation.ThreadSafe;
+import org.apache.log4j.Logger;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Task;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.PriorityBlockingQueue;
+
+/**
+ * Priority scheduler. Request with higher priority will poll earlier.
+ *
+ * @author code4crafter@gmail.com
+ * @since 0.2.1
+ */
+@ThreadSafe
+public class PriorityScheduler implements Scheduler {
+
+ public static final int INITIAL_CAPACITY = 5;
+
+ private Logger logger = Logger.getLogger(getClass());
+
+ private BlockingQueue noPriorityQueue = new LinkedBlockingQueue();
+
+ private PriorityBlockingQueue priorityQueuePlus = new PriorityBlockingQueue(INITIAL_CAPACITY, new Comparator() {
+ @Override
+ public int compare(Request o1, Request o2) {
+ return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
+ }
+ });
+
+ private PriorityBlockingQueue priorityQueueMinus = new PriorityBlockingQueue(INITIAL_CAPACITY, new Comparator() {
+ @Override
+ public int compare(Request o1, Request o2) {
+ return -(new Long(o1.getPriority()).compareTo(o2.getPriority()));
+ }
+ });
+
+ private Set urls = new HashSet();
+
+ @Override
+ public synchronized void push(Request request, Task task) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("push to queue " + request.getUrl());
+ }
+ if (urls.add(request.getUrl())) {
+ if (request.getPriority() == 0) {
+ noPriorityQueue.add(request);
+ } else if (request.getPriority() > 0) {
+ priorityQueuePlus.put(request);
+ } else {
+ priorityQueueMinus.put(request);
+ }
+ }
+ }
+
+ @Override
+ public synchronized Request poll(Task task) {
+ Request poll = priorityQueuePlus.poll();
+ if (poll != null) {
+ return poll;
+ }
+ poll = noPriorityQueue.poll();
+ if (poll != null) {
+ return poll;
+ }
+ return priorityQueueMinus.poll();
+ }
+}
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/PrioritySchedulerTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/PrioritySchedulerTest.java
new file mode 100644
index 0000000..700f454
--- /dev/null
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/scheduler/PrioritySchedulerTest.java
@@ -0,0 +1,75 @@
+package us.codecraft.webmagic.scheduler;
+
+import junit.framework.Assert;
+import org.junit.Test;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Task;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public class PrioritySchedulerTest {
+
+ private PriorityScheduler priorityScheduler = new PriorityScheduler();
+
+ private Task task = new Task() {
+ @Override
+ public String getUUID() {
+ return "1";
+ }
+
+ @Override
+ public Site getSite() {
+ return null;
+ }
+ };
+
+ @Test
+ public void testDifferentPriority() {
+ Request request = new Request("a");
+ request.setPriority(100);
+ priorityScheduler.push(request,task);
+
+ request = new Request("b");
+ request.setPriority(900);
+ priorityScheduler.push(request,task);
+
+ request = new Request("c");
+ priorityScheduler.push(request,task);
+
+ request = new Request("d");
+ request.setPriority(-900);
+ priorityScheduler.push(request,task);
+
+ Request poll = priorityScheduler.poll(task);
+ Assert.assertEquals("b",poll.getUrl());
+ poll = priorityScheduler.poll(task);
+ Assert.assertEquals("a",poll.getUrl());
+ poll = priorityScheduler.poll(task);
+ Assert.assertEquals("c",poll.getUrl());
+ poll = priorityScheduler.poll(task);
+ Assert.assertEquals("d",poll.getUrl());
+ }
+
+ @Test
+ public void testNoPriority() {
+ Request request = new Request("a");
+ priorityScheduler.push(request,task);
+
+ request = new Request("b");
+ priorityScheduler.push(request,task);
+
+ request = new Request("c");
+ priorityScheduler.push(request,task);
+
+ Request poll = priorityScheduler.poll(task);
+ Assert.assertEquals("a",poll.getUrl());
+
+ poll = priorityScheduler.poll(task);
+ Assert.assertEquals("b",poll.getUrl());
+
+ poll = priorityScheduler.poll(task);
+ Assert.assertEquals("c",poll.getUrl());
+ }
+}
diff --git a/webmagic-lucene/pom.xml b/webmagic-lucene/pom.xml
index d54d239..97946cc 100644
--- a/webmagic-lucene/pom.xml
+++ b/webmagic-lucene/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 98bc7a2..35ddcaa 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index cfed143..efa8291 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index 4469e3e..43bbcfb 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.2.1-SNAPSHOT
+ 0.2.1
4.0.0