#41 add getThreadAlive(),getStatus,getPageCount() to spider
parent
cf62d707e0
commit
c2d6d495b3
|
@ -6,9 +6,9 @@ import org.apache.log4j.Logger;
|
||||||
import us.codecraft.webmagic.downloader.Downloader;
|
import us.codecraft.webmagic.downloader.Downloader;
|
||||||
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||||
import us.codecraft.webmagic.pipeline.CollectorPipeline;
|
import us.codecraft.webmagic.pipeline.CollectorPipeline;
|
||||||
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
|
|
||||||
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||||
|
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.webmagic.scheduler.QueueScheduler;
|
import us.codecraft.webmagic.scheduler.QueueScheduler;
|
||||||
import us.codecraft.webmagic.scheduler.Scheduler;
|
import us.codecraft.webmagic.scheduler.Scheduler;
|
||||||
|
@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.concurrent.locks.Condition;
|
import java.util.concurrent.locks.Condition;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
@ -100,6 +98,8 @@ public class Spider implements Runnable, Task {
|
||||||
|
|
||||||
private final AtomicInteger threadAlive = new AtomicInteger(0);
|
private final AtomicInteger threadAlive = new AtomicInteger(0);
|
||||||
|
|
||||||
|
private final AtomicLong pageCount = new AtomicLong(0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a spider with pageProcessor.
|
* create a spider with pageProcessor.
|
||||||
*
|
*
|
||||||
|
@ -306,6 +306,7 @@ public class Spider implements Runnable, Task {
|
||||||
logger.error("download " + requestFinal + " error", e);
|
logger.error("download " + requestFinal + " error", e);
|
||||||
} finally {
|
} finally {
|
||||||
threadAlive.decrementAndGet();
|
threadAlive.decrementAndGet();
|
||||||
|
pageCount.incrementAndGet();
|
||||||
signalNewUrl();
|
signalNewUrl();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -566,6 +567,61 @@ public class Spider implements Runnable, Task {
|
||||||
return spawnUrl;
|
return spawnUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get page count downloaded by spider.
|
||||||
|
*
|
||||||
|
* @return total downloaded page count
|
||||||
|
* @since 0.4.1
|
||||||
|
*/
|
||||||
|
public long getPageCount() {
|
||||||
|
return pageCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get running status by spider.
|
||||||
|
*
|
||||||
|
* @return running status
|
||||||
|
* @see Status
|
||||||
|
* @since 0.4.1
|
||||||
|
*/
|
||||||
|
public Status getStatus(){
|
||||||
|
return Status.fromValue(stat.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public enum Status {
|
||||||
|
Init(0), Running(1), Stopped(2);
|
||||||
|
|
||||||
|
private Status(int value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int value;
|
||||||
|
|
||||||
|
int getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Status fromValue(int value) {
|
||||||
|
for (Status status : Status.values()) {
|
||||||
|
if (status.getValue() == value) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//default value
|
||||||
|
return Init;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get thread count which is running
|
||||||
|
* @return thread count which is running
|
||||||
|
* @since 0.4.1
|
||||||
|
*/
|
||||||
|
public int getThreadAlive() {
|
||||||
|
return threadAlive.get();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether add urls extracted to download.<br>
|
* Whether add urls extracted to download.<br>
|
||||||
* Add urls to download when it is true, and just download seed urls when it is false. <br>
|
* Add urls to download when it is true, and just download seed urls when it is false. <br>
|
||||||
|
|
Loading…
Reference in New Issue