diff --git a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
index 8fd1c6a..171ca44 100644
--- a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
+++ b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
@@ -2,16 +2,20 @@ package us.codecraft.webmagic.selenium.downloader;
import org.apache.log4j.Logger;
import org.openqa.selenium.By;
+import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;
+import java.util.Map;
+
/**
* @author yihua.huang@dianping.com
* @date: 13-7-26
@@ -25,24 +29,40 @@ public class SeleniumDownloader implements Downloader {
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
+ webDriverPool = new WebDriverPool();
+ }
+
+ public SeleniumDownloader(String chromeDriverPath, int poolSize) {
+ System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
+ webDriverPool = new WebDriverPool(poolSize);
}
@Override
public Page download(Request request, Task task) {
- WebDriver webDriver = null;
+ WebDriver webDriver;
try {
webDriver = webDriverPool.get();
} catch (InterruptedException e) {
- logger.warn("interrupted",e);
+ logger.warn("interrupted", e);
return null;
}
webDriver.get(request.getUrl());
+ WebDriver.Options manage = webDriver.manage();
+ Site site = task.getSite();
+ if (site.getCookies() != null) {
+ for (Map.Entry cookieEntry : site.getCookies().entrySet()) {
+ Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue());
+ manage.addCookie(cookie);
+ }
+ }
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
+ webDriverPool.returnToPool(webDriver);
return page;
}
+
}
diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java
new file mode 100644
index 0000000..d210684
--- /dev/null
+++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java
@@ -0,0 +1,36 @@
+package us.codecraft.webmagic.selenium.downloader;
+
+import org.junit.Ignore;
+import org.junit.Test;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Task;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-7-26
+ * Time: 下午2:46
+ */
+public class SeleniumDownloaderTest {
+
+ private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
+
+ @Ignore("need chrome driver")
+ @Test
+ public void test(){
+ SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
+ Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
+ @Override
+ public String getUUID() {
+ return "huaban.com";
+ }
+
+ @Override
+ public Site getSite() {
+ return Site.me();
+ }
+ });
+ System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
+ }
+}
diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java
index d38216f..38e4f86 100644
--- a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java
+++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java
@@ -10,12 +10,13 @@ import org.openqa.selenium.WebDriver;
*/
public class WebDriverPoolTest {
+ private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
+
@Test
- public void test(){
- String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
+ public void test() {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
- WebDriverPool webDriverPool =new WebDriverPool(5);
- for (int i=0;i<5;i++){
+ WebDriverPool webDriverPool = new WebDriverPool(5);
+ for (int i = 0; i < 5; i++) {
try {
WebDriver webDriver = webDriverPool.get();
System.out.println(i);