complete selenium
parent
86a20eabd9
commit
644a90c2d8
|
@ -2,16 +2,20 @@ package us.codecraft.webmagic.selenium.downloader;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.Cookie;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.selector.Html;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
|
@ -25,24 +29,40 @@ public class SeleniumDownloader implements Downloader {
|
|||
|
||||
public SeleniumDownloader(String chromeDriverPath) {
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
webDriverPool = new WebDriverPool();
|
||||
}
|
||||
|
||||
public SeleniumDownloader(String chromeDriverPath, int poolSize) {
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
webDriverPool = new WebDriverPool(poolSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Page download(Request request, Task task) {
|
||||
WebDriver webDriver = null;
|
||||
WebDriver webDriver;
|
||||
try {
|
||||
webDriver = webDriverPool.get();
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("interrupted",e);
|
||||
logger.warn("interrupted", e);
|
||||
return null;
|
||||
}
|
||||
webDriver.get(request.getUrl());
|
||||
WebDriver.Options manage = webDriver.manage();
|
||||
Site site = task.getSite();
|
||||
if (site.getCookies() != null) {
|
||||
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
|
||||
Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue());
|
||||
manage.addCookie(cookie);
|
||||
}
|
||||
}
|
||||
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
||||
String content = webElement.getAttribute("outerHTML");
|
||||
Page page = new Page();
|
||||
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
||||
page.setUrl(new PlainText(request.getUrl()));
|
||||
page.setRequest(request);
|
||||
webDriverPool.returnToPool(webDriver);
|
||||
return page;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package us.codecraft.webmagic.selenium.downloader;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午2:46 <br>
|
||||
*/
|
||||
public class SeleniumDownloaderTest {
|
||||
|
||||
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
||||
|
||||
@Ignore("need chrome driver")
|
||||
@Test
|
||||
public void test(){
|
||||
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
|
||||
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
|
||||
@Override
|
||||
public String getUUID() {
|
||||
return "huaban.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return Site.me();
|
||||
}
|
||||
});
|
||||
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
|
||||
}
|
||||
}
|
|
@ -10,12 +10,13 @@ import org.openqa.selenium.WebDriver;
|
|||
*/
|
||||
public class WebDriverPoolTest {
|
||||
|
||||
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
||||
|
||||
@Test
|
||||
public void test(){
|
||||
String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
||||
public void test() {
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
WebDriverPool webDriverPool =new WebDriverPool(5);
|
||||
for (int i=0;i<5;i++){
|
||||
WebDriverPool webDriverPool = new WebDriverPool(5);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
try {
|
||||
WebDriver webDriver = webDriverPool.get();
|
||||
System.out.println(i);
|
||||
|
|
Loading…
Reference in New Issue