Add PhantomJS support for selenium
The configuration file is config.ini The dependencies are updated in pom.xml. Update SeleniumDownloader and WebDriverPool to support PhantomJS. NOTE: The versions of GhostDriver, Selenium, and PhantomJS are stable and validated. A GooglePlay Example is under samples package: GooglePlayProcessor.javamaster
parent
b30ca6ce1e
commit
d3bbece202
|
@ -0,0 +1,12 @@
|
||||||
|
# What WebDriver to use for the tests
|
||||||
|
driver=phantomjs
|
||||||
|
#driver=firefox
|
||||||
|
#driver=chrome
|
||||||
|
#driver=http://localhost:8910
|
||||||
|
#driver=http://localhost:4444/wd/hub
|
||||||
|
|
||||||
|
# PhantomJS specific config (change according to your installation)
|
||||||
|
#phantomjs_exec_path=/Users/Bingo/bin/phantomjs-qt5
|
||||||
|
phantomjs_exec_path=/Users/Bingo/Downloads/phantomjs-1.9.8-macosx/bin/phantomjs
|
||||||
|
#phantomjs_driver_path=/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/src/main.js
|
||||||
|
phantomjs_driver_loglevel=DEBUG
|
|
@ -1,40 +1,50 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
<parent>
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<parent>
|
||||||
<groupId>us.codecraft</groupId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<version>0.5.2</version>
|
<groupId>us.codecraft</groupId>
|
||||||
</parent>
|
<version>0.5.2</version>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<artifactId>webmagic-selenium</artifactId>
|
<artifactId>webmagic-selenium</artifactId>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.seleniumhq.selenium</groupId>
|
<groupId>org.seleniumhq.selenium</groupId>
|
||||||
<artifactId>selenium-java</artifactId>
|
<artifactId>selenium-java</artifactId>
|
||||||
<version>2.33.0</version>
|
<version>2.34.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>webmagic-core</artifactId>
|
<artifactId>webmagic-core</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<!-- <dependency> <groupId>com.github.detro</groupId> <artifactId>phantomjsdriver</artifactId>
|
||||||
<groupId>junit</groupId>
|
<version>1.2.0</version> </dependency> -->
|
||||||
<artifactId>junit</artifactId>
|
<dependency>
|
||||||
</dependency>
|
<groupId>com.github.detro.ghostdriver</groupId>
|
||||||
</dependencies>
|
<artifactId>phantomjsdriver</artifactId>
|
||||||
|
<version>1.1.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<build>
|
|
||||||
<plugins>
|
<dependency>
|
||||||
<plugin>
|
<groupId>junit</groupId>
|
||||||
<artifactId>maven-deploy-plugin</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<configuration>
|
</dependency>
|
||||||
<skip>true</skip>
|
</dependencies>
|
||||||
</configuration>
|
|
||||||
</plugin>
|
<build>
|
||||||
</plugins>
|
<plugins>
|
||||||
</build>
|
<plugin>
|
||||||
|
<artifactId>maven-deploy-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<skip>true</skip>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -5,6 +5,7 @@ import org.openqa.selenium.By;
|
||||||
import org.openqa.selenium.Cookie;
|
import org.openqa.selenium.Cookie;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.WebElement;
|
import org.openqa.selenium.WebElement;
|
||||||
|
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
@ -23,90 +24,113 @@ import java.util.Map;
|
||||||
* 需要下载Selenium driver支持。<br>
|
* 需要下载Selenium driver支持。<br>
|
||||||
*
|
*
|
||||||
* @author code4crafter@gmail.com <br>
|
* @author code4crafter@gmail.com <br>
|
||||||
* Date: 13-7-26 <br>
|
* Date: 13-7-26 <br>
|
||||||
* Time: 下午1:37 <br>
|
* Time: 下午1:37 <br>
|
||||||
*/
|
*/
|
||||||
public class SeleniumDownloader implements Downloader, Closeable {
|
public class SeleniumDownloader implements Downloader, Closeable {
|
||||||
|
|
||||||
private volatile WebDriverPool webDriverPool;
|
private volatile WebDriverPool webDriverPool;
|
||||||
|
|
||||||
private Logger logger = Logger.getLogger(getClass());
|
private Logger logger = Logger.getLogger(getClass());
|
||||||
|
|
||||||
private int sleepTime = 0;
|
private int sleepTime = 0;
|
||||||
|
|
||||||
private int poolSize = 1;
|
private int poolSize = 1;
|
||||||
|
|
||||||
/**
|
private static final String DRIVER_PHANTOMJS = "phantomjs";
|
||||||
* 新建
|
|
||||||
*
|
|
||||||
* @param chromeDriverPath
|
|
||||||
*/
|
|
||||||
public SeleniumDownloader(String chromeDriverPath) {
|
|
||||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* set sleep time to wait until load success
|
* 新建
|
||||||
*
|
*
|
||||||
* @param sleepTime
|
* @param chromeDriverPath
|
||||||
* @return this
|
*/
|
||||||
*/
|
public SeleniumDownloader(String chromeDriverPath) {
|
||||||
public SeleniumDownloader setSleepTime(int sleepTime) {
|
System.getProperties().setProperty("webdriver.chrome.driver",
|
||||||
this.sleepTime = sleepTime;
|
chromeDriverPath);
|
||||||
return this;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public Page download(Request request, Task task) {
|
* Constructor without any filed. Construct PhantomJS browser
|
||||||
checkInit();
|
*
|
||||||
WebDriver webDriver;
|
* @author bob.li.0718@gmail.com
|
||||||
try {
|
*/
|
||||||
webDriver = webDriverPool.get();
|
public SeleniumDownloader() {
|
||||||
} catch (InterruptedException e) {
|
// System.setProperty("phantomjs.binary.path",
|
||||||
logger.warn("interrupted", e);
|
// "/Users/Bingo/Downloads/phantomjs-1.9.7-macosx/bin/phantomjs");
|
||||||
return null;
|
}
|
||||||
}
|
|
||||||
logger.info("downloading page " + request.getUrl());
|
|
||||||
webDriver.get(request.getUrl());
|
|
||||||
try {
|
|
||||||
Thread.sleep(sleepTime);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
WebDriver.Options manage = webDriver.manage();
|
|
||||||
Site site = task.getSite();
|
|
||||||
if (site.getCookies() != null) {
|
|
||||||
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
|
|
||||||
Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue());
|
|
||||||
manage.addCookie(cookie);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
|
||||||
String content = webElement.getAttribute("outerHTML");
|
|
||||||
Page page = new Page();
|
|
||||||
page.setRawText(content);
|
|
||||||
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
|
||||||
page.setUrl(new PlainText(request.getUrl()));
|
|
||||||
page.setRequest(request);
|
|
||||||
webDriverPool.returnToPool(webDriver);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkInit() {
|
/**
|
||||||
if (webDriverPool == null) {
|
* set sleep time to wait until load success
|
||||||
synchronized (this){
|
*
|
||||||
webDriverPool = new WebDriverPool(poolSize);
|
* @param sleepTime
|
||||||
}
|
* @return this
|
||||||
}
|
*/
|
||||||
}
|
public SeleniumDownloader setSleepTime(int sleepTime) {
|
||||||
|
this.sleepTime = sleepTime;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setThread(int thread) {
|
public Page download(Request request, Task task) {
|
||||||
this.poolSize = thread;
|
checkInit();
|
||||||
}
|
WebDriver webDriver;
|
||||||
|
try {
|
||||||
|
webDriver = webDriverPool.get();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
logger.warn("interrupted", e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
logger.info("downloading page " + request.getUrl());
|
||||||
|
webDriver.get(request.getUrl());
|
||||||
|
try {
|
||||||
|
Thread.sleep(sleepTime);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
WebDriver.Options manage = webDriver.manage();
|
||||||
|
Site site = task.getSite();
|
||||||
|
if (site.getCookies() != null) {
|
||||||
|
for (Map.Entry<String, String> cookieEntry : site.getCookies()
|
||||||
|
.entrySet()) {
|
||||||
|
Cookie cookie = new Cookie(cookieEntry.getKey(),
|
||||||
|
cookieEntry.getValue());
|
||||||
|
manage.addCookie(cookie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
/*
|
||||||
public void close() throws IOException {
|
* TODO You can add mouse event or other processes
|
||||||
webDriverPool.closeAll();
|
*
|
||||||
}
|
* @author: bob.li.0718@gmail.com
|
||||||
|
*/
|
||||||
|
|
||||||
|
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
||||||
|
String content = webElement.getAttribute("outerHTML");
|
||||||
|
Page page = new Page();
|
||||||
|
page.setRawText(content);
|
||||||
|
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content,
|
||||||
|
request.getUrl())));
|
||||||
|
page.setUrl(new PlainText(request.getUrl()));
|
||||||
|
page.setRequest(request);
|
||||||
|
webDriverPool.returnToPool(webDriver);
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkInit() {
|
||||||
|
if (webDriverPool == null) {
|
||||||
|
synchronized (this) {
|
||||||
|
webDriverPool = new WebDriverPool(poolSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setThread(int thread) {
|
||||||
|
this.poolSize = thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
webDriverPool.closeAll();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,89 +3,231 @@ package us.codecraft.webmagic.downloader.selenium;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.openqa.selenium.firefox.FirefoxDriver;
|
||||||
|
import org.openqa.selenium.phantomjs.PhantomJSDriver;
|
||||||
|
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
|
||||||
|
import org.openqa.selenium.remote.DesiredCapabilities;
|
||||||
|
import org.openqa.selenium.remote.RemoteWebDriver;
|
||||||
|
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
import java.util.concurrent.BlockingDeque;
|
import java.util.concurrent.BlockingDeque;
|
||||||
import java.util.concurrent.LinkedBlockingDeque;
|
import java.util.concurrent.LinkedBlockingDeque;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com <br>
|
* @author code4crafter@gmail.com <br>
|
||||||
* Date: 13-7-26 <br>
|
* Date: 13-7-26 <br>
|
||||||
* Time: 下午1:41 <br>
|
* Time: 下午1:41 <br>
|
||||||
*/
|
*/
|
||||||
class WebDriverPool {
|
class WebDriverPool {
|
||||||
private Logger logger = Logger.getLogger(getClass());
|
private Logger logger = Logger.getLogger(getClass());
|
||||||
|
|
||||||
private final static int DEFAULT_CAPACITY = 5;
|
private final static int DEFAULT_CAPACITY = 5;
|
||||||
|
|
||||||
private final int capacity;
|
private final int capacity;
|
||||||
|
|
||||||
private final static int STAT_RUNNING = 1;
|
private final static int STAT_RUNNING = 1;
|
||||||
|
|
||||||
private final static int STAT_CLODED = 2;
|
private final static int STAT_CLODED = 2;
|
||||||
|
|
||||||
private AtomicInteger stat = new AtomicInteger(STAT_RUNNING);
|
private AtomicInteger stat = new AtomicInteger(STAT_RUNNING);
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* store webDrivers created
|
* new fields for configuring phantomJS
|
||||||
*/
|
*/
|
||||||
private List<WebDriver> webDriverList = Collections.synchronizedList(new ArrayList<WebDriver>());
|
private WebDriver mDriver = null;
|
||||||
|
private boolean mAutoQuitDriver = true;
|
||||||
|
|
||||||
/**
|
private static final String CONFIG_FILE = "/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/config.ini";
|
||||||
* store webDrivers available
|
private static final String DRIVER_FIREFOX = "firefox";
|
||||||
*/
|
private static final String DRIVER_CHROME = "chrome";
|
||||||
private BlockingDeque<WebDriver> innerQueue = new LinkedBlockingDeque<WebDriver>();
|
private static final String DRIVER_PHANTOMJS = "phantomjs";
|
||||||
|
|
||||||
public WebDriverPool(int capacity) {
|
protected static Properties sConfig;
|
||||||
this.capacity = capacity;
|
protected static DesiredCapabilities sCaps;
|
||||||
}
|
|
||||||
|
|
||||||
public WebDriverPool() {
|
/**
|
||||||
this(DEFAULT_CAPACITY);
|
* Configure the GhostDriver, and initialize a WebDriver instance. This part
|
||||||
}
|
* of code comes from GhostDriver.
|
||||||
|
* https://github.com/detro/ghostdriver/tree/master/test/java/src/test/java/ghostdriver
|
||||||
|
*
|
||||||
|
* @author bob.li.0718@gmail.com
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void configure() throws IOException {
|
||||||
|
// Read config file
|
||||||
|
sConfig = new Properties();
|
||||||
|
sConfig.load(new FileReader(CONFIG_FILE));
|
||||||
|
|
||||||
public WebDriver get() throws InterruptedException {
|
// Prepare capabilities
|
||||||
checkRunning();
|
sCaps = new DesiredCapabilities();
|
||||||
WebDriver poll = innerQueue.poll();
|
sCaps.setJavascriptEnabled(true);
|
||||||
if (poll != null) {
|
sCaps.setCapability("takesScreenshot", false);
|
||||||
return poll;
|
|
||||||
}
|
|
||||||
if (webDriverList.size() < capacity) {
|
|
||||||
synchronized (webDriverList) {
|
|
||||||
if (webDriverList.size() < capacity) {
|
|
||||||
ChromeDriver e = new ChromeDriver();
|
|
||||||
innerQueue.add(e);
|
|
||||||
webDriverList.add(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
String driver = sConfig.getProperty("driver", DRIVER_PHANTOMJS);
|
||||||
return innerQueue.take();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void returnToPool(WebDriver webDriver) {
|
// Fetch PhantomJS-specific configuration parameters
|
||||||
checkRunning();
|
if (driver.equals(DRIVER_PHANTOMJS)) {
|
||||||
innerQueue.add(webDriver);
|
// "phantomjs_exec_path"
|
||||||
}
|
if (sConfig.getProperty("phantomjs_exec_path") != null) {
|
||||||
|
sCaps.setCapability(
|
||||||
|
PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,
|
||||||
|
sConfig.getProperty("phantomjs_exec_path"));
|
||||||
|
} else {
|
||||||
|
throw new IOException(
|
||||||
|
String.format(
|
||||||
|
"Property '%s' not set!",
|
||||||
|
PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY));
|
||||||
|
}
|
||||||
|
// "phantomjs_driver_path"
|
||||||
|
if (sConfig.getProperty("phantomjs_driver_path") != null) {
|
||||||
|
System.out.println("Test will use an external GhostDriver");
|
||||||
|
sCaps.setCapability(
|
||||||
|
PhantomJSDriverService.PHANTOMJS_GHOSTDRIVER_PATH_PROPERTY,
|
||||||
|
sConfig.getProperty("phantomjs_driver_path"));
|
||||||
|
} else {
|
||||||
|
System.out
|
||||||
|
.println("Test will use PhantomJS internal GhostDriver");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected void checkRunning() {
|
// Disable "web-security", enable all possible "ssl-protocols" and
|
||||||
if (!stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
// "ignore-ssl-errors" for PhantomJSDriver
|
||||||
throw new IllegalStateException("Already closed!");
|
// sCaps.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS, new
|
||||||
}
|
// String[] {
|
||||||
}
|
// "--web-security=false",
|
||||||
|
// "--ssl-protocol=any",
|
||||||
|
// "--ignore-ssl-errors=true"
|
||||||
|
// });
|
||||||
|
|
||||||
public void closeAll() {
|
ArrayList<String> cliArgsCap = new ArrayList<String>();
|
||||||
boolean b = stat.compareAndSet(STAT_RUNNING, STAT_CLODED);
|
cliArgsCap.add("--web-security=false");
|
||||||
if (!b) {
|
cliArgsCap.add("--ssl-protocol=any");
|
||||||
throw new IllegalStateException("Already closed!");
|
cliArgsCap.add("--ignore-ssl-errors=true");
|
||||||
}
|
sCaps.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS,
|
||||||
for (WebDriver webDriver : webDriverList) {
|
cliArgsCap);
|
||||||
logger.info("Quit webDriver" + webDriver);
|
|
||||||
webDriver.quit();
|
// Control LogLevel for GhostDriver, via CLI arguments
|
||||||
}
|
sCaps.setCapability(
|
||||||
}
|
PhantomJSDriverService.PHANTOMJS_GHOSTDRIVER_CLI_ARGS,
|
||||||
|
new String[] { "--logLevel="
|
||||||
|
+ (sConfig.getProperty("phantomjs_driver_loglevel") != null ? sConfig
|
||||||
|
.getProperty("phantomjs_driver_loglevel")
|
||||||
|
: "INFO") });
|
||||||
|
|
||||||
|
// String driver = sConfig.getProperty("driver", DRIVER_PHANTOMJS);
|
||||||
|
|
||||||
|
// Start appropriate Driver
|
||||||
|
if (isUrl(driver)) {
|
||||||
|
sCaps.setBrowserName("phantomjs");
|
||||||
|
mDriver = new RemoteWebDriver(new URL(driver), sCaps);
|
||||||
|
} else if (driver.equals(DRIVER_FIREFOX)) {
|
||||||
|
mDriver = new FirefoxDriver(sCaps);
|
||||||
|
} else if (driver.equals(DRIVER_CHROME)) {
|
||||||
|
mDriver = new ChromeDriver(sCaps);
|
||||||
|
} else if (driver.equals(DRIVER_PHANTOMJS)) {
|
||||||
|
mDriver = new PhantomJSDriver(sCaps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check whether input is a valid URL
|
||||||
|
*
|
||||||
|
* @author bob.li.0718@gmail.com
|
||||||
|
* @param urlString
|
||||||
|
* @return true means yes, otherwise no.
|
||||||
|
*/
|
||||||
|
private boolean isUrl(String urlString) {
|
||||||
|
try {
|
||||||
|
new URL(urlString);
|
||||||
|
return true;
|
||||||
|
} catch (MalformedURLException mue) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* store webDrivers created
|
||||||
|
*/
|
||||||
|
private List<WebDriver> webDriverList = Collections
|
||||||
|
.synchronizedList(new ArrayList<WebDriver>());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* store webDrivers available
|
||||||
|
*/
|
||||||
|
private BlockingDeque<WebDriver> innerQueue = new LinkedBlockingDeque<WebDriver>();
|
||||||
|
|
||||||
|
public WebDriverPool(int capacity) {
|
||||||
|
this.capacity = capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WebDriverPool() {
|
||||||
|
this(DEFAULT_CAPACITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public WebDriver get() throws InterruptedException {
|
||||||
|
checkRunning();
|
||||||
|
WebDriver poll = innerQueue.poll();
|
||||||
|
if (poll != null) {
|
||||||
|
return poll;
|
||||||
|
}
|
||||||
|
if (webDriverList.size() < capacity) {
|
||||||
|
synchronized (webDriverList) {
|
||||||
|
if (webDriverList.size() < capacity) {
|
||||||
|
|
||||||
|
// add new WebDriver instance into pool
|
||||||
|
try {
|
||||||
|
configure();
|
||||||
|
innerQueue.add(mDriver);
|
||||||
|
webDriverList.add(mDriver);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ChromeDriver e = new ChromeDriver();
|
||||||
|
// WebDriver e = getWebDriver();
|
||||||
|
// innerQueue.add(e);
|
||||||
|
// webDriverList.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return innerQueue.take();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void returnToPool(WebDriver webDriver) {
|
||||||
|
checkRunning();
|
||||||
|
innerQueue.add(webDriver);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void checkRunning() {
|
||||||
|
if (!stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
||||||
|
throw new IllegalStateException("Already closed!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void closeAll() {
|
||||||
|
boolean b = stat.compareAndSet(STAT_RUNNING, STAT_CLODED);
|
||||||
|
if (!b) {
|
||||||
|
throw new IllegalStateException("Already closed!");
|
||||||
|
}
|
||||||
|
for (WebDriver webDriver : webDriverList) {
|
||||||
|
logger.info("Quit webDriver" + webDriver);
|
||||||
|
webDriver.quit();
|
||||||
|
webDriver = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.Page;
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.Spider;
|
||||||
|
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
|
||||||
|
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||||
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Using Selenium with PhantomJS to fetch web-page with JS<br>
|
||||||
|
*
|
||||||
|
* @author bob.li.0718@gmail.com <br>
|
||||||
|
* Date: 15-7-11 <br>
|
||||||
|
*/
|
||||||
|
public class GooglePlayProcessor implements PageProcessor {
|
||||||
|
|
||||||
|
private Site site;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(Page page) {
|
||||||
|
|
||||||
|
page.putField("whole-html", page.getHtml().toString());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Site getSite() {
|
||||||
|
if (null == site) {
|
||||||
|
site = Site.me().setDomain("play.google.com").setSleepTime(300);
|
||||||
|
}
|
||||||
|
return site;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
Spider.create(new GooglePlayProcessor())
|
||||||
|
.thread(5)
|
||||||
|
.addPipeline(
|
||||||
|
new FilePipeline(
|
||||||
|
"/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/data/"))
|
||||||
|
.setDownloader(new SeleniumDownloader())
|
||||||
|
.addUrl("https://play.google.com/store/apps/details?id=com.tencent.mm")
|
||||||
|
.runAsync();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue