diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java index 1f6657c..9b9740d 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java @@ -1,5 +1,8 @@ package us.codecraft.webmagic; +import java.util.HashMap; +import java.util.Map; + /** * Request对象封装了待抓取的url信息。
* 在PageProcessor中,Request对象可以通过{@link us.codecraft.webmagic.Page#getRequest()} 获取。
@@ -18,26 +21,29 @@ package us.codecraft.webmagic; * String linktext = (String)page.getRequest().getExtra()[0]; * } * + * * @author code4crafter@gmail.com
- * Date: 13-4-21 - * Time: 上午11:37 + * Date: 13-4-21 + * Time: 上午11:37 */ public class Request { private String url; - private Object[] extra; + /** + * 额外参数,可以保存一些需要的上下文信息 + */ + private Map extras = new HashMap(); private double priority; /** * 构建一个request对象 - * @param url 必须参数,待抓取的url - * @param extra 额外参数,可以保存一些需要的上下文信息 + * + * @param url 必须参数,待抓取的url */ - public Request(String url, Object... extra) { + public Request(String url) { this.url = url; - this.extra = extra; } public double getPriority() { @@ -49,16 +55,18 @@ public class Request { return this; } - /** - * 获取预存的对象 - * @return object[] 预存的对象数组 - */ - public Object[] getExtra() { - return extra; + public Object getExtra(String key) { + return extras.get(key); + } + + public Request putExtra(String key,Object value) { + extras.put(key,value); + return this; } /** * 获取待抓取的url + * * @return url 待抓取的url */ public String getUrl() { diff --git a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java index 094295c..8109ad1 100644 --- a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java +++ b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java @@ -33,7 +33,7 @@ public class RedisScheduler implements Scheduler { if (jedis.zrank(SET_PREFIX + task.getUUID(), request.getUrl()) == null) { //使用List保存队列 jedis.rpush(QUEUE_PREFIX + task.getUUID(), request.getUrl()); - jedis.zadd(SET_PREFIX + task.getUUID(), System.currentTimeMillis(), request.getUrl()); + jedis.zadd(SET_PREFIX + task.getUUID(), request.getPriority(), request.getUrl()); } pool.returnResource(jedis); }