From 8b7e6a350b79d8e5a9c5fc1962b83540f85833a1 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Tue, 6 Aug 2013 23:11:00 +0800 Subject: [PATCH] add test case back --- .../webmagic/downloader/SeleniumTest.java | 41 +++++++++++++ .../selenium/SeleniumDownloaderTest.java | 61 +++++++++++++++++++ .../selenium/WebDriverPoolTest.java | 31 ++++++++++ .../webmagic/samples/HuabanProcessor.java | 2 +- 4 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/SeleniumTest.java create mode 100644 webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloaderTest.java create mode 100644 webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/WebDriverPoolTest.java diff --git a/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/SeleniumTest.java b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/SeleniumTest.java new file mode 100644 index 0000000..2c19033 --- /dev/null +++ b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/SeleniumTest.java @@ -0,0 +1,41 @@ +package us.codecraft.webmagic.downloader; + +import org.junit.Ignore; +import org.junit.Test; +import org.openqa.selenium.By; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.remote.DesiredCapabilities; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * @author code4crafter@gmail.com
+ * @date: 13-7-26
+ * Time: 下午12:27
+ */ +public class SeleniumTest { + + @Ignore("need chrome driver") + @Test + public void testSelenium() { + System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver"); + Map contentSettings = new HashMap(); + contentSettings.put("images", 2); + + Map preferences = new HashMap(); + preferences.put("profile.default_content_settings", contentSettings); + + DesiredCapabilities caps = DesiredCapabilities.chrome(); + caps.setCapability("chrome.prefs", preferences); + caps.setCapability("chrome.switches", Arrays.asList("--user-data-dir=/Users/yihua/temp/chrome")); + WebDriver webDriver = new ChromeDriver(caps); + webDriver.get("http://huaban.com/"); + WebElement webElement = webDriver.findElement(By.xpath("/html")); + System.out.println(webElement.getAttribute("outerHTML")); + webDriver.close(); + } +} diff --git a/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloaderTest.java b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloaderTest.java new file mode 100644 index 0000000..fe98e8f --- /dev/null +++ b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloaderTest.java @@ -0,0 +1,61 @@ +package us.codecraft.webmagic.downloader.selenium; + +import org.junit.Ignore; +import org.junit.Test; +import us.codecraft.webmagic.Page; +import us.codecraft.webmagic.Request; +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.Task; + +/** + * @author code4crafter@gmail.com
+ * @date: 13-7-26
+ * Time: 下午2:46
+ */ +public class SeleniumDownloaderTest { + + private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + + @Ignore("need chrome driver") + @Test + public void test() { + SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath); + long time1 = System.currentTimeMillis(); + for (int i = 0; i < 100; i++) { + Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() { + @Override + public String getUUID() { + return "huaban.com"; + } + + @Override + public Site getSite() { + return Site.me(); + } + }); + System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all()); + } + System.out.println(System.currentTimeMillis() - time1); + } + + @Ignore + @Test + public void testBaiduWenku() { + SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath); + seleniumDownloader.setSleepTime(10000); + long time1 = System.currentTimeMillis(); + Page page = seleniumDownloader.download(new Request("http://wenku.baidu.com/view/462933ff04a1b0717fd5ddc2.html"), new Task() { + @Override + public String getUUID() { + return "huaban.com"; + } + + @Override + public Site getSite() { + return Site.me(); + } + }); + System.out.println(page.getHtml().$("div.inner").replace("<[^<>]+>","").replace("&nsbp;","").all()); + } + +} diff --git a/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/WebDriverPoolTest.java b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/WebDriverPoolTest.java new file mode 100644 index 0000000..4d5d275 --- /dev/null +++ b/webmagic-plugin/webmagic-misc/src/test/java/us/codecraft/webmagic/downloader/selenium/WebDriverPoolTest.java @@ -0,0 +1,31 @@ +package us.codecraft.webmagic.downloader.selenium; + +import org.junit.Ignore; +import org.junit.Test; +import org.openqa.selenium.WebDriver; + +/** + * @author code4crafter@gmail.com
+ * @date: 13-7-26
+ * Time: 下午2:12
+ */ +public class WebDriverPoolTest { + + private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + + @Ignore("need chrome driver") + @Test + public void test() { + System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); + WebDriverPool webDriverPool = new WebDriverPool(5); + for (int i = 0; i < 5; i++) { + try { + WebDriver webDriver = webDriverPool.get(); + System.out.println(i); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + webDriverPool.closeAll(); + } +} diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/HuabanProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/HuabanProcessor.java index eef2b2f..4763c07 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/HuabanProcessor.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/HuabanProcessor.java @@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.pipeline.FilePipeline; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.scheduler.RedisScheduler; -import us.codecraft.webmagic.selenium.downloader.SeleniumDownloader; +import us.codecraft.webmagic.downloader.downloader.SeleniumDownloader; /** * 花瓣网抽取器。