diff --git a/release-note.md b/release-note.md index ae5fc56..f44704e 100755 --- a/release-note.md +++ b/release-note.md @@ -1,6 +1,6 @@ Release Notes ---- -See old versions in [https://github.com/code4craft/webmagic/releases](https://github.com/code4craft/webmagic/releases) +See latest versions in [https://github.com/code4craft/webmagic/releases](https://github.com/code4craft/webmagic/releases) *2012-9-4* `version:0.3.0` diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java index 96ff24e..003c573 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/BaiduBaike.java @@ -46,4 +46,12 @@ public class BaiduBaike{ } ooSpider.close(); } + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java index b646b0f..2ded0bd 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java @@ -1,5 +1,6 @@ package us.codecraft.webmagic.scheduler; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.log4j.Logger; import us.codecraft.webmagic.Request; @@ -94,6 +95,9 @@ public class FileCacheQueueScheduler implements Scheduler { urls = new LinkedHashSet(); readCursorFile(); readUrlFile(); + } catch (FileNotFoundException e) { + //init + logger.info("init cache file " + getFileName(fileUrlAllName)); } catch (IOException e) { logger.error("init file error", e); } @@ -101,23 +105,37 @@ public class FileCacheQueueScheduler implements Scheduler { private void readUrlFile() throws IOException { String line; - BufferedReader fileUrlReader = new BufferedReader(new FileReader(getFileName(fileUrlAllName))); - int lineReaded = 0; - while ((line = fileUrlReader.readLine()) != null) { - urls.add(line.trim()); - lineReaded++; - if (lineReaded > cursor.get()) { - queue.add(new Request(line)); + BufferedReader fileUrlReader = null; + try { + fileUrlReader = new BufferedReader(new FileReader(getFileName(fileUrlAllName))); + int lineReaded = 0; + while ((line = fileUrlReader.readLine()) != null) { + urls.add(line.trim()); + lineReaded++; + if (lineReaded > cursor.get()) { + queue.add(new Request(line)); + } + } + } finally { + if (fileUrlReader != null) { + IOUtils.closeQuietly(fileUrlReader); } } } private void readCursorFile() throws IOException { - BufferedReader fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor))); - String line; - //read the last number - while ((line = fileCursorReader.readLine()) != null) { - cursor = new AtomicInteger(NumberUtils.toInt(line)); + BufferedReader fileCursorReader = null; + try { + new BufferedReader(new FileReader(getFileName(fileCursor))); + String line; + //read the last number + while ((line = fileCursorReader.readLine()) != null) { + cursor = new AtomicInteger(NumberUtils.toInt(line)); + } + } finally { + if (fileCursorReader != null) { + IOUtils.closeQuietly(fileCursorReader); + } } }