fix compile error
parent
019353b41e
commit
312e1bce87
|
@ -44,13 +44,8 @@ public class FileCacheQueueSchedular implements Schedular {
|
|||
|
||||
private Set<String> urls;
|
||||
|
||||
public FileCacheQueueSchedular(Task task) {
|
||||
this.task = task;
|
||||
}
|
||||
|
||||
public FileCacheQueueSchedular(Task task, String filePath) {
|
||||
public FileCacheQueueSchedular(String filePath) {
|
||||
this.filePath = filePath;
|
||||
this.task = task;
|
||||
}
|
||||
|
||||
private void flush() {
|
||||
|
@ -58,7 +53,8 @@ public class FileCacheQueueSchedular implements Schedular {
|
|||
fileCursorWriter.flush();
|
||||
}
|
||||
|
||||
private void init() {
|
||||
private void init(Task task) {
|
||||
this.task = task;
|
||||
File file = new File(filePath);
|
||||
if (!file.exists()) {
|
||||
file.mkdirs();
|
||||
|
@ -127,7 +123,7 @@ public class FileCacheQueueSchedular implements Schedular {
|
|||
@Override
|
||||
public synchronized void push(Request request, Task task) {
|
||||
if (!inited.get()) {
|
||||
init();
|
||||
init(task);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("push to queue " + request.getUrl());
|
||||
|
@ -142,7 +138,7 @@ public class FileCacheQueueSchedular implements Schedular {
|
|||
@Override
|
||||
public synchronized Request poll(Task task) {
|
||||
if (!inited.get()) {
|
||||
init();
|
||||
init(task);
|
||||
}
|
||||
fileCursorWriter.println(cursor.incrementAndGet());
|
||||
return queue.poll();
|
||||
|
|
|
@ -5,10 +5,12 @@ import freemarker.template.Template;
|
|||
import freemarker.template.TemplateException;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
/**
|
||||
* Author: code4crafter@gmail.com
|
||||
|
@ -37,10 +39,8 @@ public class FreemarkerPipeline implements Pipeline {
|
|||
|
||||
|
||||
@Override
|
||||
public void process(Page page, Site site) {
|
||||
String domain = site.getDomain();
|
||||
domain = UrlUtils.getDomain(domain);
|
||||
String path = this.path + "" + domain + "/";
|
||||
public void process(Page page, Task task) {
|
||||
String path = this.path + "" + task.getUUID() + "/";
|
||||
File file = new File(path);
|
||||
if (!file.exists()) {
|
||||
file.mkdirs();
|
||||
|
|
|
@ -31,7 +31,7 @@ public class SpiderTest {
|
|||
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
||||
System.out.println(pageProcessor2.getSite().getEncoding());
|
||||
pageProcessor2.getSite().setSleepTime(500);
|
||||
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")).
|
||||
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||
processor(pageProcessor2).run();
|
||||
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ public class DiandianProcessorTest {
|
|||
//ConsolePipeline输出结果到控制台
|
||||
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
||||
//Spider.run()执行
|
||||
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
|
||||
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||
processor(diaoyuwengProcessor).run();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package us.codecraft.webmagic.processor;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||
|
@ -16,11 +17,12 @@ import java.io.IOException;
|
|||
*/
|
||||
public class DiaoyuwengProcessorTest {
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void test() throws IOException {
|
||||
DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor();
|
||||
FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl");
|
||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
|
||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||
processor(diaoyuwengProcessor).run();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ public class SinablogProcessorTest {
|
|||
//ConsolePipeline输出结果到控制台
|
||||
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
||||
//Spider.run()执行
|
||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(sinaBlogProcesser.getSite(), "/data/temp/webmagic/cache/")).
|
||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||
processor(sinaBlogProcesser).run();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue