fix redisScheduler thread problem
parent
95ba784685
commit
2c97dd90c7
|
@ -7,13 +7,17 @@ import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.schedular.Scheduler;
|
import us.codecraft.webmagic.schedular.Scheduler;
|
||||||
|
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 使用redis管理url,构建一个分布式的爬虫。<br>
|
* 使用redis管理url,构建一个分布式的爬虫。<br>
|
||||||
|
*
|
||||||
* @author yihua.huang@dianping.com <br>
|
* @author yihua.huang@dianping.com <br>
|
||||||
* @date: 13-7-25 <br>
|
* @date: 13-7-25 <br>
|
||||||
* Time: 上午7:07 <br>
|
* Time: 上午7:07 <br>
|
||||||
*/
|
*/
|
||||||
public class RedisScheduler implements Scheduler{
|
public class RedisScheduler implements Scheduler {
|
||||||
|
|
||||||
private JedisPool pool;
|
private JedisPool pool;
|
||||||
|
|
||||||
|
@ -21,7 +25,11 @@ public class RedisScheduler implements Scheduler{
|
||||||
|
|
||||||
private static final String SET_PREFIX = "set_";
|
private static final String SET_PREFIX = "set_";
|
||||||
|
|
||||||
public RedisScheduler(String host){
|
private ReentrantLock lock = new ReentrantLock();
|
||||||
|
|
||||||
|
private Condition condition = lock.newCondition();
|
||||||
|
|
||||||
|
public RedisScheduler(String host) {
|
||||||
pool = new JedisPool(new JedisPoolConfig(), host);
|
pool = new JedisPool(new JedisPoolConfig(), host);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,10 +37,16 @@ public class RedisScheduler implements Scheduler{
|
||||||
public synchronized void push(Request request, Task task) {
|
public synchronized void push(Request request, Task task) {
|
||||||
Jedis jedis = pool.getResource();
|
Jedis jedis = pool.getResource();
|
||||||
//使用SortedSet进行url去重
|
//使用SortedSet进行url去重
|
||||||
if (jedis.zrank(SET_PREFIX+task.getUUID(),request.getUrl())==null){
|
if (jedis.zrank(SET_PREFIX + task.getUUID(), request.getUrl()) == null) {
|
||||||
//使用List保存队列
|
try {
|
||||||
jedis.rpush(QUEUE_PREFIX+task.getUUID(),request.getUrl());
|
lock.lock();
|
||||||
jedis.zadd(SET_PREFIX+task.getUUID(),System.currentTimeMillis(),request.getUrl());
|
//使用List保存队列
|
||||||
|
jedis.rpush(QUEUE_PREFIX + task.getUUID(), request.getUrl());
|
||||||
|
jedis.zadd(SET_PREFIX + task.getUUID(), System.currentTimeMillis(), request.getUrl());
|
||||||
|
condition.signal();
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pool.returnResource(jedis);
|
pool.returnResource(jedis);
|
||||||
}
|
}
|
||||||
|
@ -40,7 +54,21 @@ public class RedisScheduler implements Scheduler{
|
||||||
@Override
|
@Override
|
||||||
public synchronized Request poll(Task task) {
|
public synchronized Request poll(Task task) {
|
||||||
Jedis jedis = pool.getResource();
|
Jedis jedis = pool.getResource();
|
||||||
String url = jedis.lpop(QUEUE_PREFIX+task.getUUID());
|
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
|
||||||
|
if (url == null) {
|
||||||
|
try {
|
||||||
|
lock.lock();
|
||||||
|
while (url == null) {
|
||||||
|
try {
|
||||||
|
condition.await();
|
||||||
|
url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
pool.returnResource(jedis);
|
pool.returnResource(jedis);
|
||||||
return new Request(url);
|
return new Request(url);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,11 @@ import org.openqa.selenium.By;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.WebElement;
|
import org.openqa.selenium.WebElement;
|
||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.openqa.selenium.remote.DesiredCapabilities;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yihua.huang@dianping.com <br>
|
* @author yihua.huang@dianping.com <br>
|
||||||
|
@ -18,7 +23,16 @@ public class SeleniumTest {
|
||||||
@Test
|
@Test
|
||||||
public void testSelenium() {
|
public void testSelenium() {
|
||||||
System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver");
|
System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver");
|
||||||
WebDriver webDriver = new ChromeDriver();
|
Map<String, Object> contentSettings = new HashMap<String, Object>();
|
||||||
|
contentSettings.put("images", 2);
|
||||||
|
|
||||||
|
Map<String, Object> preferences = new HashMap<String, Object>();
|
||||||
|
preferences.put("profile.default_content_settings", contentSettings);
|
||||||
|
|
||||||
|
DesiredCapabilities caps = DesiredCapabilities.chrome();
|
||||||
|
caps.setCapability("chrome.prefs", preferences);
|
||||||
|
caps.setCapability("chrome.switches", Arrays.asList("--user-data-dir=/Users/yihua/temp/chrome"));
|
||||||
|
WebDriver webDriver = new ChromeDriver(caps);
|
||||||
webDriver.get("http://huaban.com/");
|
webDriver.get("http://huaban.com/");
|
||||||
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
||||||
System.out.println(webElement.getAttribute("outerHTML"));
|
System.out.println(webElement.getAttribute("outerHTML"));
|
||||||
|
|
Loading…
Reference in New Issue