diff --git a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
index 481981d..575beef 100644
--- a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
+++ b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
@@ -7,13 +7,17 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.schedular.Scheduler;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+
/**
* 使用redis管理url,构建一个分布式的爬虫。
+ *
* @author yihua.huang@dianping.com
* @date: 13-7-25
* Time: 上午7:07
*/
-public class RedisScheduler implements Scheduler{
+public class RedisScheduler implements Scheduler {
private JedisPool pool;
@@ -21,7 +25,11 @@ public class RedisScheduler implements Scheduler{
private static final String SET_PREFIX = "set_";
- public RedisScheduler(String host){
+ private ReentrantLock lock = new ReentrantLock();
+
+ private Condition condition = lock.newCondition();
+
+ public RedisScheduler(String host) {
pool = new JedisPool(new JedisPoolConfig(), host);
}
@@ -29,10 +37,16 @@ public class RedisScheduler implements Scheduler{
public synchronized void push(Request request, Task task) {
Jedis jedis = pool.getResource();
//使用SortedSet进行url去重
- if (jedis.zrank(SET_PREFIX+task.getUUID(),request.getUrl())==null){
- //使用List保存队列
- jedis.rpush(QUEUE_PREFIX+task.getUUID(),request.getUrl());
- jedis.zadd(SET_PREFIX+task.getUUID(),System.currentTimeMillis(),request.getUrl());
+ if (jedis.zrank(SET_PREFIX + task.getUUID(), request.getUrl()) == null) {
+ try {
+ lock.lock();
+ //使用List保存队列
+ jedis.rpush(QUEUE_PREFIX + task.getUUID(), request.getUrl());
+ jedis.zadd(SET_PREFIX + task.getUUID(), System.currentTimeMillis(), request.getUrl());
+ condition.signal();
+ } finally {
+ lock.unlock();
+ }
}
pool.returnResource(jedis);
}
@@ -40,7 +54,21 @@ public class RedisScheduler implements Scheduler{
@Override
public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource();
- String url = jedis.lpop(QUEUE_PREFIX+task.getUUID());
+ String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
+ if (url == null) {
+ try {
+ lock.lock();
+ while (url == null) {
+ try {
+ condition.await();
+ url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
+ } catch (InterruptedException e) {
+ }
+ }
+ } finally {
+ lock.unlock();
+ }
+ }
pool.returnResource(jedis);
return new Request(url);
}
diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/SeleniumTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/SeleniumTest.java
index 6cf50c3..a403b91 100644
--- a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/SeleniumTest.java
+++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/SeleniumTest.java
@@ -6,6 +6,11 @@ import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.remote.DesiredCapabilities;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
/**
* @author yihua.huang@dianping.com
@@ -18,7 +23,16 @@ public class SeleniumTest {
@Test
public void testSelenium() {
System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver");
- WebDriver webDriver = new ChromeDriver();
+ Map contentSettings = new HashMap();
+ contentSettings.put("images", 2);
+
+ Map preferences = new HashMap();
+ preferences.put("profile.default_content_settings", contentSettings);
+
+ DesiredCapabilities caps = DesiredCapabilities.chrome();
+ caps.setCapability("chrome.prefs", preferences);
+ caps.setCapability("chrome.switches", Arrays.asList("--user-data-dir=/Users/yihua/temp/chrome"));
+ WebDriver webDriver = new ChromeDriver(caps);
webDriver.get("http://huaban.com/");
WebElement webElement = webDriver.findElement(By.xpath("/html"));
System.out.println(webElement.getAttribute("outerHTML"));