package us.codecraft.webmagic.scheduler; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.schedular.Scheduler; /** * 使用redis管理url,构建一个分布式的爬虫。
* @author yihua.huang@dianping.com
* @date: 13-7-25
* Time: 上午7:07
*/ public class RedisScheduler implements Scheduler{ private JedisPool pool; private static final String QUEUE_PREFIX = "queue_"; private static final String SET_PREFIX = "set_"; public RedisScheduler(String host){ pool = new JedisPool(new JedisPoolConfig(), host); } @Override public synchronized void push(Request request, Task task) { Jedis jedis = pool.getResource(); if (jedis.zrank(SET_PREFIX+task.getUUID(),request.getUrl())==null){ jedis.rpush(QUEUE_PREFIX+task.getUUID(),request.getUrl()); jedis.zadd(SET_PREFIX+task.getUUID(),System.currentTimeMillis(),request.getUrl()); } pool.returnResource(jedis); } @Override public synchronized Request poll(Task task) { Jedis jedis = pool.getResource(); String url = jedis.lpop(QUEUE_PREFIX+task.getUUID()); pool.returnResource(jedis); return new Request(url); } }