diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index a568f93..cd092a3 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -2,6 +2,7 @@ package us.codecraft.webmagic; import org.apache.commons.collections.CollectionUtils; import org.apache.log4j.Logger; +import us.codecraft.webmagic.downloader.Destroyable; import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.downloader.HttpClientDownloader; import us.codecraft.webmagic.pipeline.ConsolePipeline; @@ -198,6 +199,22 @@ public class Spider implements Runnable, Task { executorService.shutdown(); } stat.compareAndSet(STAT_RUNNING, STAT_STOPPED); + //release some resources + destroy(); + } + + private void destroy() { + destroyEach(downloader); + destroyEach(pageProcessor); + for (Pipeline pipeline : pipelines) { + destroyEach(pipeline); + } + } + + private void destroyEach(Object object){ + if (object instanceof Destroyable) { + ((Destroyable)object).destroy(); + } } private void processRequest(Request request) { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Destroyable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Destroyable.java new file mode 100644 index 0000000..4f07528 --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Destroyable.java @@ -0,0 +1,13 @@ +package us.codecraft.webmagic.downloader; + +/** + * 比较占用资源的服务可以实现该接口,Spider会在结束时调用destroy()释放资源。
+ * @author yihua.huang@dianping.com
+ * @date: 13-7-26
+ * Time: 下午3:10
+ */ +public interface Destroyable { + + public void destroy(); + +} diff --git a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java index 171ca44..1ed8b4d 100644 --- a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java +++ b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java @@ -9,6 +9,7 @@ import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; +import us.codecraft.webmagic.downloader.Destroyable; import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.selector.Html; import us.codecraft.webmagic.selector.PlainText; @@ -17,16 +18,22 @@ import us.codecraft.webmagic.utils.UrlUtils; import java.util.Map; /** + * 使用Selenium调用浏览器进行渲染。目前仅支持chrome。
+ * 需要下载Selenium driver支持。
* @author yihua.huang@dianping.com
* @date: 13-7-26
* Time: 下午1:37
*/ -public class SeleniumDownloader implements Downloader { +public class SeleniumDownloader implements Downloader,Destroyable { private WebDriverPool webDriverPool; private Logger logger = Logger.getLogger(getClass()); + /** + * 新建 + * @param chromeDriverPath + */ public SeleniumDownloader(String chromeDriverPath) { System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); webDriverPool = new WebDriverPool(); @@ -65,4 +72,8 @@ public class SeleniumDownloader implements Downloader { return page; } + @Override + public void destroy() { + webDriverPool.closeAll(); + } } diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java index d50c20a..615ad86 100644 --- a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java +++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java @@ -14,24 +14,27 @@ import us.codecraft.webmagic.Task; */ public class SeleniumDownloaderTest { - private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + private String chromeDriverPath = ""; @Ignore("need chrome driver") @Test - public void test(){ + public void test() { SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath); + long time1 = System.currentTimeMillis(); + for (int i = 0; i < 100; i++) { + Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() { + @Override + public String getUUID() { + return "huaban.com"; + } - Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() { - @Override - public String getUUID() { - return "huaban.com"; - } - - @Override - public Site getSite() { - return Site.me(); - } - }); - System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all()); + @Override + public Site getSite() { + return Site.me(); + } + }); + System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all()); + } + System.out.println(System.currentTimeMillis() - time1); } }