From 312e1bce87981301dde04cfb7a61a04d887ad973 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Tue, 18 Jun 2013 18:02:30 +0800 Subject: [PATCH] fix compile error --- .../schedular/FileCacheQueueSchedular.java | 14 +++++--------- .../webmagic/pipeline/FreemarkerPipeline.java | 14 +++++++------- .../java/us/codecraft/webmagic/SpiderTest.java | 2 +- .../webmagic/processor/DiandianProcessorTest.java | 2 +- .../processor/DiaoyuwengProcessorTest.java | 4 +++- .../webmagic/processor/SinablogProcessorTest.java | 2 +- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueSchedular.java b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueSchedular.java index e9d4adb..882f498 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueSchedular.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueSchedular.java @@ -44,13 +44,8 @@ public class FileCacheQueueSchedular implements Schedular { private Set urls; - public FileCacheQueueSchedular(Task task) { - this.task = task; - } - - public FileCacheQueueSchedular(Task task, String filePath) { + public FileCacheQueueSchedular(String filePath) { this.filePath = filePath; - this.task = task; } private void flush() { @@ -58,7 +53,8 @@ public class FileCacheQueueSchedular implements Schedular { fileCursorWriter.flush(); } - private void init() { + private void init(Task task) { + this.task = task; File file = new File(filePath); if (!file.exists()) { file.mkdirs(); @@ -127,7 +123,7 @@ public class FileCacheQueueSchedular implements Schedular { @Override public synchronized void push(Request request, Task task) { if (!inited.get()) { - init(); + init(task); } if (logger.isDebugEnabled()) { logger.debug("push to queue " + request.getUrl()); @@ -142,7 +138,7 @@ public class FileCacheQueueSchedular implements Schedular { @Override public synchronized Request poll(Task task) { if (!inited.get()) { - init(); + init(task); } fileCursorWriter.println(cursor.incrementAndGet()); return queue.poll(); diff --git a/webmagic-plugin/src/main/java/us/codecraft/webmagic/pipeline/FreemarkerPipeline.java b/webmagic-plugin/src/main/java/us/codecraft/webmagic/pipeline/FreemarkerPipeline.java index 9afebb6..1121971 100644 --- a/webmagic-plugin/src/main/java/us/codecraft/webmagic/pipeline/FreemarkerPipeline.java +++ b/webmagic-plugin/src/main/java/us/codecraft/webmagic/pipeline/FreemarkerPipeline.java @@ -5,10 +5,12 @@ import freemarker.template.Template; import freemarker.template.TemplateException; import org.apache.commons.codec.digest.DigestUtils; import us.codecraft.webmagic.Page; -import us.codecraft.webmagic.Site; -import us.codecraft.webmagic.utils.UrlUtils; +import us.codecraft.webmagic.Task; -import java.io.*; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; /** * Author: code4crafter@gmail.com @@ -37,10 +39,8 @@ public class FreemarkerPipeline implements Pipeline { @Override - public void process(Page page, Site site) { - String domain = site.getDomain(); - domain = UrlUtils.getDomain(domain); - String path = this.path + "" + domain + "/"; + public void process(Page page, Task task) { + String path = this.path + "" + task.getUUID() + "/"; File file = new File(path); if (!file.exists()) { file.mkdirs(); diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java index 79f428e..b67ffc4 100644 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java +++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java @@ -31,7 +31,7 @@ public class SpiderTest { SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html"); System.out.println(pageProcessor2.getSite().getEncoding()); pageProcessor2.getSite().setSleepTime(500); - Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")). + Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")). processor(pageProcessor2).run(); diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java index 74de9c0..721a5eb 100644 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java +++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java @@ -30,7 +30,7 @@ public class DiandianProcessorTest { //ConsolePipeline输出结果到控制台 //FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录 //Spider.run()执行 - Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")). + Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")). processor(diaoyuwengProcessor).run(); } } diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java index 0c24b57..8f03b6a 100644 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java +++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java @@ -1,5 +1,6 @@ package us.codecraft.webmagic.processor; +import org.junit.Ignore; import org.junit.Test; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.pipeline.FilePipeline; @@ -16,11 +17,12 @@ import java.io.IOException; */ public class DiaoyuwengProcessorTest { + @Ignore @Test public void test() throws IOException { DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor(); FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl"); - Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")). + Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")). processor(diaoyuwengProcessor).run(); } } diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java index 72e6be0..8fb2583 100644 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java +++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java @@ -30,7 +30,7 @@ public class SinablogProcessorTest { //ConsolePipeline输出结果到控制台 //FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录 //Spider.run()执行 - Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(sinaBlogProcesser.getSite(), "/data/temp/webmagic/cache/")). + Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")). processor(sinaBlogProcesser).run(); } }