fix a thread pool exception
parent
3c79d031bd
commit
fba330872b
|
@ -21,21 +21,26 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Entrance of a crawler.<br>
|
* Entrance of a crawler.<br>
|
||||||
* A spider contains four modules: Downloader, Scheduler, PageProcessor and Pipeline.<br>
|
* A spider contains four modules: Downloader, Scheduler, PageProcessor and
|
||||||
|
* Pipeline.<br>
|
||||||
* Every module is a field of Spider. <br>
|
* Every module is a field of Spider. <br>
|
||||||
* The modules are defined in interface. <br>
|
* The modules are defined in interface. <br>
|
||||||
* You can customize a spider with various implementations of them. <br>
|
* You can customize a spider with various implementations of them. <br>
|
||||||
* Examples: <br>
|
* Examples: <br>
|
||||||
* <br>
|
* <br>
|
||||||
* A simple crawler: <br>
|
* A simple crawler: <br>
|
||||||
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")).run();<br>
|
* Spider.create(new SimplePageProcessor("http://my.oschina.net/",
|
||||||
|
* "http://my.oschina.net/*blog/*")).run();<br>
|
||||||
* <br>
|
* <br>
|
||||||
* Store results to files by FilePipeline: <br>
|
* Store results to files by FilePipeline: <br>
|
||||||
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) <br>
|
* Spider.create(new SimplePageProcessor("http://my.oschina.net/",
|
||||||
|
* "http://my.oschina.net/*blog/*")) <br>
|
||||||
* .pipeline(new FilePipeline("/data/temp/webmagic/")).run(); <br>
|
* .pipeline(new FilePipeline("/data/temp/webmagic/")).run(); <br>
|
||||||
* <br>
|
* <br>
|
||||||
* Use FileCacheQueueScheduler to store urls and cursor in files, so that a Spider can resume the status when shutdown. <br>
|
* Use FileCacheQueueScheduler to store urls and cursor in files, so that a
|
||||||
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) <br>
|
* Spider can resume the status when shutdown. <br>
|
||||||
|
* Spider.create(new SimplePageProcessor("http://my.oschina.net/",
|
||||||
|
* "http://my.oschina.net/*blog/*")) <br>
|
||||||
* .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run(); <br>
|
* .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run(); <br>
|
||||||
*
|
*
|
||||||
* @author code4crafter@gmail.com <br>
|
* @author code4crafter@gmail.com <br>
|
||||||
|
@ -221,8 +226,7 @@ public class Spider implements Runnable, Task {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
if (!stat.compareAndSet(STAT_INIT, STAT_RUNNING)
|
if (!stat.compareAndSet(STAT_INIT, STAT_RUNNING) && !stat.compareAndSet(STAT_STOPPED, STAT_RUNNING)) {
|
||||||
&& !stat.compareAndSet(STAT_STOPPED, STAT_RUNNING)) {
|
|
||||||
throw new IllegalStateException("Spider is already running!");
|
throw new IllegalStateException("Spider is already running!");
|
||||||
}
|
}
|
||||||
checkComponent();
|
checkComponent();
|
||||||
|
@ -233,7 +237,8 @@ public class Spider implements Runnable, Task {
|
||||||
startUrls.clear();
|
startUrls.clear();
|
||||||
}
|
}
|
||||||
Request request = scheduler.poll(this);
|
Request request = scheduler.poll(this);
|
||||||
//single thread
|
logger.info("Spider " + getUUID() + " started!");
|
||||||
|
// single thread
|
||||||
if (threadNum <= 1) {
|
if (threadNum <= 1) {
|
||||||
while (request != null && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
while (request != null && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
||||||
processRequest(request);
|
processRequest(request);
|
||||||
|
@ -243,11 +248,12 @@ public class Spider implements Runnable, Task {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
this.executorService = ThreadUtils.newFixedThreadPool(threadNum);
|
this.executorService = ThreadUtils.newFixedThreadPool(threadNum);
|
||||||
}
|
}
|
||||||
//multi thread
|
// multi thread
|
||||||
final AtomicInteger threadAlive = new AtomicInteger(0);
|
final AtomicInteger threadAlive = new AtomicInteger(0);
|
||||||
while (true && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
while (true && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
||||||
if (request == null) {
|
if (request == null) {
|
||||||
//when no request found but some thread is alive, sleep a while.
|
// when no request found but some thread is alive, sleep a
|
||||||
|
// while.
|
||||||
try {
|
try {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -274,7 +280,7 @@ public class Spider implements Runnable, Task {
|
||||||
executorService.shutdown();
|
executorService.shutdown();
|
||||||
}
|
}
|
||||||
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
||||||
//release some resources
|
// release some resources
|
||||||
destroy();
|
destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,7 +305,8 @@ public class Spider implements Runnable, Task {
|
||||||
/**
|
/**
|
||||||
* Process specific urls without url discovering.
|
* Process specific urls without url discovering.
|
||||||
*
|
*
|
||||||
* @param urls urls to process
|
* @param urls
|
||||||
|
* urls to process
|
||||||
*/
|
*/
|
||||||
public void test(String... urls) {
|
public void test(String... urls) {
|
||||||
checkComponent();
|
checkComponent();
|
||||||
|
@ -316,7 +323,7 @@ public class Spider implements Runnable, Task {
|
||||||
sleep(site.getSleepTime());
|
sleep(site.getSleepTime());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
//for cycle retry
|
// for cycle retry
|
||||||
if (page.getHtml() == null) {
|
if (page.getHtml() == null) {
|
||||||
addRequest(page);
|
addRequest(page);
|
||||||
sleep(site.getSleepTime());
|
sleep(site.getSleepTime());
|
||||||
|
@ -365,9 +372,15 @@ public class Spider implements Runnable, Task {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void stop() {
|
public void stop() {
|
||||||
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
|
if (stat.compareAndSet(STAT_RUNNING, STAT_STOPPED)) {
|
||||||
|
if (executorService != null) {
|
||||||
executorService.shutdown();
|
executorService.shutdown();
|
||||||
}
|
}
|
||||||
|
logger.info("Spider " + getUUID() + " stop success!");
|
||||||
|
} else {
|
||||||
|
logger.info("Spider " + getUUID() + " stop fail!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void stopAndDestroy() {
|
public void stopAndDestroy() {
|
||||||
stop();
|
stop();
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package us.codecraft.webmagic.utils;
|
package us.codecraft.webmagic.utils;
|
||||||
|
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.SynchronousQueue;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@ -12,21 +12,10 @@ import java.util.concurrent.TimeUnit;
|
||||||
public class ThreadUtils {
|
public class ThreadUtils {
|
||||||
|
|
||||||
public static ExecutorService newFixedThreadPool(int threadSize) {
|
public static ExecutorService newFixedThreadPool(int threadSize) {
|
||||||
return new ThreadPoolExecutor(threadSize, threadSize, 0L, TimeUnit.MILLISECONDS,
|
if (threadSize <= 1) {
|
||||||
new LinkedBlockingQueue<Runnable>(1) {
|
throw new IllegalArgumentException("ThreadSize must be greater than 1!");
|
||||||
|
|
||||||
private static final long serialVersionUID = -9028058603126367678L;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean offer(Runnable e) {
|
|
||||||
try {
|
|
||||||
put(e);
|
|
||||||
return true;
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
}
|
||||||
return false;
|
return new ThreadPoolExecutor(threadSize - 1, threadSize - 1, 0L, TimeUnit.MILLISECONDS,
|
||||||
}
|
new SynchronousQueue<Runnable>(), new ThreadPoolExecutor.CallerRunsPolicy());
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,11 +18,12 @@ public class SpiderTest {
|
||||||
public void process(ResultItems resultItems, Task task) {
|
public void process(ResultItems resultItems, Task task) {
|
||||||
System.out.println(1);
|
System.out.println(1);
|
||||||
}
|
}
|
||||||
});
|
}).thread(2);
|
||||||
spider.start();
|
spider.start();
|
||||||
Thread.sleep(10000);
|
Thread.sleep(10000);
|
||||||
spider.stop();
|
spider.stop();
|
||||||
// spider.run();
|
Thread.sleep(10000);
|
||||||
|
spider.start();
|
||||||
Thread.sleep(10000);
|
Thread.sleep(10000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue