comments
parent
3a666fcebf
commit
17e95f2a7f
|
@ -10,7 +10,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
* Thread pool for workers.<br></br>
|
||||
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br></br>
|
||||
* New feature: <br></br>
|
||||
* 1. Block when thread pool is full to avoid poll many urls but not process. <br></br>
|
||||
* 1. Block when thread pool is full to avoid poll many urls without process. <br></br>
|
||||
* 2. Count of thread alive for monitor.
|
||||
*
|
||||
* @author code4crafer@gmail.com
|
||||
|
|
|
@ -3,12 +3,14 @@ package us.codecraft.webmagic.configurable;
|
|||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.utils.Experimental;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
*/
|
||||
@Experimental
|
||||
public class ConfigurablePageProcessor implements PageProcessor {
|
||||
|
||||
private Site site;
|
||||
|
|
|
@ -5,8 +5,7 @@ import org.slf4j.LoggerFactory;
|
|||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.SpiderListener;
|
||||
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
|
||||
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
|
||||
import us.codecraft.webmagic.utils.Experimental;
|
||||
import us.codecraft.webmagic.utils.IPUtils;
|
||||
|
||||
import javax.management.JMException;
|
||||
|
@ -30,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
* @author code4crafer@gmail.com
|
||||
* @since 0.5.0
|
||||
*/
|
||||
@Experimental
|
||||
public class SpiderMonitor {
|
||||
|
||||
private enum Type {
|
||||
|
@ -226,22 +226,4 @@ public class SpiderMonitor {
|
|||
return this;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
|
||||
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
|
||||
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
|
||||
.addUrl("https://github.com/code4craft");
|
||||
|
||||
SpiderMonitor spiderMonitor = new SpiderMonitor();
|
||||
spiderMonitor.register(oschinaSpider, githubSpider);
|
||||
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
|
||||
//ONLY ONE server can start for a machine.
|
||||
//Others will be registered
|
||||
spiderMonitor.server().jmxStart();
|
||||
oschinaSpider.start();
|
||||
githubSpider.thread(10).start();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue