fix compile error

master
yihua.huang 2013-06-18 18:02:30 +08:00
parent 019353b41e
commit 312e1bce87
6 changed files with 18 additions and 20 deletions

View File

@ -44,13 +44,8 @@ public class FileCacheQueueSchedular implements Schedular {
private Set<String> urls; private Set<String> urls;
public FileCacheQueueSchedular(Task task) { public FileCacheQueueSchedular(String filePath) {
this.task = task;
}
public FileCacheQueueSchedular(Task task, String filePath) {
this.filePath = filePath; this.filePath = filePath;
this.task = task;
} }
private void flush() { private void flush() {
@ -58,7 +53,8 @@ public class FileCacheQueueSchedular implements Schedular {
fileCursorWriter.flush(); fileCursorWriter.flush();
} }
private void init() { private void init(Task task) {
this.task = task;
File file = new File(filePath); File file = new File(filePath);
if (!file.exists()) { if (!file.exists()) {
file.mkdirs(); file.mkdirs();
@ -127,7 +123,7 @@ public class FileCacheQueueSchedular implements Schedular {
@Override @Override
public synchronized void push(Request request, Task task) { public synchronized void push(Request request, Task task) {
if (!inited.get()) { if (!inited.get()) {
init(); init(task);
} }
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
logger.debug("push to queue " + request.getUrl()); logger.debug("push to queue " + request.getUrl());
@ -142,7 +138,7 @@ public class FileCacheQueueSchedular implements Schedular {
@Override @Override
public synchronized Request poll(Task task) { public synchronized Request poll(Task task) {
if (!inited.get()) { if (!inited.get()) {
init(); init(task);
} }
fileCursorWriter.println(cursor.incrementAndGet()); fileCursorWriter.println(cursor.incrementAndGet());
return queue.poll(); return queue.poll();

View File

@ -5,10 +5,12 @@ import freemarker.template.Template;
import freemarker.template.TemplateException; import freemarker.template.TemplateException;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.utils.UrlUtils;
import java.io.*; import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
/** /**
* Author: code4crafter@gmail.com * Author: code4crafter@gmail.com
@ -37,10 +39,8 @@ public class FreemarkerPipeline implements Pipeline {
@Override @Override
public void process(Page page, Site site) { public void process(Page page, Task task) {
String domain = site.getDomain(); String path = this.path + "" + task.getUUID() + "/";
domain = UrlUtils.getDomain(domain);
String path = this.path + "" + domain + "/";
File file = new File(path); File file = new File(path);
if (!file.exists()) { if (!file.exists()) {
file.mkdirs(); file.mkdirs();

View File

@ -31,7 +31,7 @@ public class SpiderTest {
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html"); SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
System.out.println(pageProcessor2.getSite().getEncoding()); System.out.println(pageProcessor2.getSite().getEncoding());
pageProcessor2.getSite().setSleepTime(500); pageProcessor2.getSite().setSleepTime(500);
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")). Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(pageProcessor2).run(); processor(pageProcessor2).run();

View File

@ -30,7 +30,7 @@ public class DiandianProcessorTest {
//ConsolePipeline输出结果到控制台 //ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录 //FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行 //Spider.run()执行
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")). Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(diaoyuwengProcessor).run(); processor(diaoyuwengProcessor).run();
} }
} }

View File

@ -1,5 +1,6 @@
package us.codecraft.webmagic.processor; package us.codecraft.webmagic.processor;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline; import us.codecraft.webmagic.pipeline.FilePipeline;
@ -16,11 +17,12 @@ import java.io.IOException;
*/ */
public class DiaoyuwengProcessorTest { public class DiaoyuwengProcessorTest {
@Ignore
@Test @Test
public void test() throws IOException { public void test() throws IOException {
DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor(); DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor();
FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl"); FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl");
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")). Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(diaoyuwengProcessor).run(); processor(diaoyuwengProcessor).run();
} }
} }

View File

@ -30,7 +30,7 @@ public class SinablogProcessorTest {
//ConsolePipeline输出结果到控制台 //ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录 //FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行 //Spider.run()执行
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(sinaBlogProcesser.getSite(), "/data/temp/webmagic/cache/")). Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(sinaBlogProcesser).run(); processor(sinaBlogProcesser).run();
} }
} }