From 90e14b31b0c229d5664092ea01f739f264e419a8 Mon Sep 17 00:00:00 2001 From: x1ny Date: Thu, 12 Nov 2015 23:10:20 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3FileCacheQueueScheduler?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E7=A8=8B=E5=BA=8F=E4=B8=8D=E8=83=BD=E6=AD=A3?= =?UTF-8?q?=E5=B8=B8=E7=BB=93=E6=9D=9F=E5=92=8C=E6=9C=AA=E5=85=B3=E9=97=AD?= =?UTF-8?q?=E6=B5=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FileCacheQueueScheduler中开启了一个线程周期运行来保存数据但在爬虫结束后没有关闭导致程序无法结束,以及没有关闭io流。 解决方法: 让FileCacheQueueScheduler实现Closable接口,在close方法中关闭线程以及流。 在Spider的close方法中添加对scheduler的关闭操作。 --- .../main/java/us/codecraft/webmagic/Spider.java | 1 + .../scheduler/FileCacheQueueScheduler.java | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 7e499be..bf3343b 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -374,6 +374,7 @@ public class Spider implements Runnable, Task { public void close() { destroyEach(downloader); destroyEach(pageProcessor); + destroyEach(scheduler); for (Pipeline pipeline : pipelines) { destroyEach(pipeline); } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java index 407200a..1582fc9 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java @@ -12,17 +12,19 @@ import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; + /** * Store urls and cursor in files so that a Spider can resume the status when shutdown.
* * @author code4crafter@gmail.com
* @since 0.2.0 */ -public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler { +public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler,Closeable { private String filePath = System.getProperty("java.io.tmpdir"); @@ -43,6 +45,8 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement private BlockingQueue queue; private Set urls; + + private ScheduledExecutorService flushThreadPool; public FileCacheQueueScheduler(String filePath) { if (!filePath.endsWith("/") && !filePath.endsWith("\\")) { @@ -94,7 +98,8 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement } private void initFlushThread() { - Executors.newScheduledThreadPool(1).scheduleAtFixedRate(new Runnable() { + flushThreadPool = Executors.newScheduledThreadPool(1); + flushThreadPool.scheduleAtFixedRate(new Runnable() { @Override public void run() { flush(); @@ -161,6 +166,12 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement } } } + + public void close() throws IOException { + flushThreadPool.shutdown(); + fileUrlWriter.close(); + fileCursorWriter.close(); + } private String getFileName(String filename) { return filePath + task.getUUID() + filename;