release resource
parent
5d55d4d7ff
commit
fe224cbf66
|
@ -2,6 +2,7 @@ package us.codecraft.webmagic;
|
|||
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import us.codecraft.webmagic.downloader.Destroyable;
|
||||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||
|
@ -198,6 +199,22 @@ public class Spider implements Runnable, Task {
|
|||
executorService.shutdown();
|
||||
}
|
||||
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
||||
//release some resources
|
||||
destroy();
|
||||
}
|
||||
|
||||
private void destroy() {
|
||||
destroyEach(downloader);
|
||||
destroyEach(pageProcessor);
|
||||
for (Pipeline pipeline : pipelines) {
|
||||
destroyEach(pipeline);
|
||||
}
|
||||
}
|
||||
|
||||
private void destroyEach(Object object){
|
||||
if (object instanceof Destroyable) {
|
||||
((Destroyable)object).destroy();
|
||||
}
|
||||
}
|
||||
|
||||
private void processRequest(Request request) {
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
package us.codecraft.webmagic.downloader;
|
||||
|
||||
/**
|
||||
* 比较占用资源的服务可以实现该接口,Spider会在结束时调用destroy()释放资源。<br>
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午3:10 <br>
|
||||
*/
|
||||
public interface Destroyable {
|
||||
|
||||
public void destroy();
|
||||
|
||||
}
|
|
@ -9,6 +9,7 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.downloader.Destroyable;
|
||||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.selector.Html;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
|
@ -17,16 +18,22 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
|
||||
* 需要下载Selenium driver支持。<br>
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午1:37 <br>
|
||||
*/
|
||||
public class SeleniumDownloader implements Downloader {
|
||||
public class SeleniumDownloader implements Downloader,Destroyable {
|
||||
|
||||
private WebDriverPool webDriverPool;
|
||||
|
||||
private Logger logger = Logger.getLogger(getClass());
|
||||
|
||||
/**
|
||||
* 新建
|
||||
* @param chromeDriverPath
|
||||
*/
|
||||
public SeleniumDownloader(String chromeDriverPath) {
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
webDriverPool = new WebDriverPool();
|
||||
|
@ -65,4 +72,8 @@ public class SeleniumDownloader implements Downloader {
|
|||
return page;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
webDriverPool.closeAll();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,24 +14,27 @@ import us.codecraft.webmagic.Task;
|
|||
*/
|
||||
public class SeleniumDownloaderTest {
|
||||
|
||||
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
||||
private String chromeDriverPath = "";
|
||||
|
||||
@Ignore("need chrome driver")
|
||||
@Test
|
||||
public void test(){
|
||||
public void test() {
|
||||
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
|
||||
long time1 = System.currentTimeMillis();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
|
||||
@Override
|
||||
public String getUUID() {
|
||||
return "huaban.com";
|
||||
}
|
||||
|
||||
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
|
||||
@Override
|
||||
public String getUUID() {
|
||||
return "huaban.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return Site.me();
|
||||
}
|
||||
});
|
||||
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return Site.me();
|
||||
}
|
||||
});
|
||||
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
|
||||
}
|
||||
System.out.println(System.currentTimeMillis() - time1);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue