release resource
parent
5d55d4d7ff
commit
fe224cbf66
|
@ -2,6 +2,7 @@ package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import us.codecraft.webmagic.downloader.Destroyable;
|
||||||
import us.codecraft.webmagic.downloader.Downloader;
|
import us.codecraft.webmagic.downloader.Downloader;
|
||||||
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||||
|
@ -198,6 +199,22 @@ public class Spider implements Runnable, Task {
|
||||||
executorService.shutdown();
|
executorService.shutdown();
|
||||||
}
|
}
|
||||||
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
||||||
|
//release some resources
|
||||||
|
destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void destroy() {
|
||||||
|
destroyEach(downloader);
|
||||||
|
destroyEach(pageProcessor);
|
||||||
|
for (Pipeline pipeline : pipelines) {
|
||||||
|
destroyEach(pipeline);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void destroyEach(Object object){
|
||||||
|
if (object instanceof Destroyable) {
|
||||||
|
((Destroyable)object).destroy();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processRequest(Request request) {
|
private void processRequest(Request request) {
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 比较占用资源的服务可以实现该接口,Spider会在结束时调用destroy()释放资源。<br>
|
||||||
|
* @author yihua.huang@dianping.com <br>
|
||||||
|
* @date: 13-7-26 <br>
|
||||||
|
* Time: 下午3:10 <br>
|
||||||
|
*/
|
||||||
|
public interface Destroyable {
|
||||||
|
|
||||||
|
public void destroy();
|
||||||
|
|
||||||
|
}
|
|
@ -9,6 +9,7 @@ import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
|
import us.codecraft.webmagic.downloader.Destroyable;
|
||||||
import us.codecraft.webmagic.downloader.Downloader;
|
import us.codecraft.webmagic.downloader.Downloader;
|
||||||
import us.codecraft.webmagic.selector.Html;
|
import us.codecraft.webmagic.selector.Html;
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
@ -17,16 +18,22 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
|
||||||
|
* 需要下载Selenium driver支持。<br>
|
||||||
* @author yihua.huang@dianping.com <br>
|
* @author yihua.huang@dianping.com <br>
|
||||||
* @date: 13-7-26 <br>
|
* @date: 13-7-26 <br>
|
||||||
* Time: 下午1:37 <br>
|
* Time: 下午1:37 <br>
|
||||||
*/
|
*/
|
||||||
public class SeleniumDownloader implements Downloader {
|
public class SeleniumDownloader implements Downloader,Destroyable {
|
||||||
|
|
||||||
private WebDriverPool webDriverPool;
|
private WebDriverPool webDriverPool;
|
||||||
|
|
||||||
private Logger logger = Logger.getLogger(getClass());
|
private Logger logger = Logger.getLogger(getClass());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 新建
|
||||||
|
* @param chromeDriverPath
|
||||||
|
*/
|
||||||
public SeleniumDownloader(String chromeDriverPath) {
|
public SeleniumDownloader(String chromeDriverPath) {
|
||||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||||
webDriverPool = new WebDriverPool();
|
webDriverPool = new WebDriverPool();
|
||||||
|
@ -65,4 +72,8 @@ public class SeleniumDownloader implements Downloader {
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy() {
|
||||||
|
webDriverPool.closeAll();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,24 +14,27 @@ import us.codecraft.webmagic.Task;
|
||||||
*/
|
*/
|
||||||
public class SeleniumDownloaderTest {
|
public class SeleniumDownloaderTest {
|
||||||
|
|
||||||
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
private String chromeDriverPath = "";
|
||||||
|
|
||||||
@Ignore("need chrome driver")
|
@Ignore("need chrome driver")
|
||||||
@Test
|
@Test
|
||||||
public void test(){
|
public void test() {
|
||||||
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
|
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
|
||||||
|
long time1 = System.currentTimeMillis();
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
|
||||||
|
@Override
|
||||||
|
public String getUUID() {
|
||||||
|
return "huaban.com";
|
||||||
|
}
|
||||||
|
|
||||||
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
|
@Override
|
||||||
@Override
|
public Site getSite() {
|
||||||
public String getUUID() {
|
return Site.me();
|
||||||
return "huaban.com";
|
}
|
||||||
}
|
});
|
||||||
|
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
|
||||||
@Override
|
}
|
||||||
public Site getSite() {
|
System.out.println(System.currentTimeMillis() - time1);
|
||||||
return Site.me();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue