#41 add getThreadAlive(),getStatus,getPageCount() to spider
parent
cf62d707e0
commit
c2d6d495b3
|
@ -6,9 +6,9 @@ import org.apache.log4j.Logger;
|
|||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||
import us.codecraft.webmagic.pipeline.CollectorPipeline;
|
||||
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
|
||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.scheduler.QueueScheduler;
|
||||
import us.codecraft.webmagic.scheduler.Scheduler;
|
||||
|
@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
|
@ -100,6 +98,8 @@ public class Spider implements Runnable, Task {
|
|||
|
||||
private final AtomicInteger threadAlive = new AtomicInteger(0);
|
||||
|
||||
private final AtomicLong pageCount = new AtomicLong(0);
|
||||
|
||||
/**
|
||||
* create a spider with pageProcessor.
|
||||
*
|
||||
|
@ -306,6 +306,7 @@ public class Spider implements Runnable, Task {
|
|||
logger.error("download " + requestFinal + " error", e);
|
||||
} finally {
|
||||
threadAlive.decrementAndGet();
|
||||
pageCount.incrementAndGet();
|
||||
signalNewUrl();
|
||||
}
|
||||
}
|
||||
|
@ -566,6 +567,61 @@ public class Spider implements Runnable, Task {
|
|||
return spawnUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get page count downloaded by spider.
|
||||
*
|
||||
* @return total downloaded page count
|
||||
* @since 0.4.1
|
||||
*/
|
||||
public long getPageCount() {
|
||||
return pageCount.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get running status by spider.
|
||||
*
|
||||
* @return running status
|
||||
* @see Status
|
||||
* @since 0.4.1
|
||||
*/
|
||||
public Status getStatus(){
|
||||
return Status.fromValue(stat.get());
|
||||
}
|
||||
|
||||
|
||||
public enum Status {
|
||||
Init(0), Running(1), Stopped(2);
|
||||
|
||||
private Status(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
private int value;
|
||||
|
||||
int getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public static Status fromValue(int value) {
|
||||
for (Status status : Status.values()) {
|
||||
if (status.getValue() == value) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
//default value
|
||||
return Init;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get thread count which is running
|
||||
* @return thread count which is running
|
||||
* @since 0.4.1
|
||||
*/
|
||||
public int getThreadAlive() {
|
||||
return threadAlive.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether add urls extracted to download.<br>
|
||||
* Add urls to download when it is true, and just download seed urls when it is false. <br>
|
||||
|
|
Loading…
Reference in New Issue