fix compile error
parent
019353b41e
commit
312e1bce87
|
@ -44,13 +44,8 @@ public class FileCacheQueueSchedular implements Schedular {
|
||||||
|
|
||||||
private Set<String> urls;
|
private Set<String> urls;
|
||||||
|
|
||||||
public FileCacheQueueSchedular(Task task) {
|
public FileCacheQueueSchedular(String filePath) {
|
||||||
this.task = task;
|
|
||||||
}
|
|
||||||
|
|
||||||
public FileCacheQueueSchedular(Task task, String filePath) {
|
|
||||||
this.filePath = filePath;
|
this.filePath = filePath;
|
||||||
this.task = task;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void flush() {
|
private void flush() {
|
||||||
|
@ -58,7 +53,8 @@ public class FileCacheQueueSchedular implements Schedular {
|
||||||
fileCursorWriter.flush();
|
fileCursorWriter.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init() {
|
private void init(Task task) {
|
||||||
|
this.task = task;
|
||||||
File file = new File(filePath);
|
File file = new File(filePath);
|
||||||
if (!file.exists()) {
|
if (!file.exists()) {
|
||||||
file.mkdirs();
|
file.mkdirs();
|
||||||
|
@ -127,7 +123,7 @@ public class FileCacheQueueSchedular implements Schedular {
|
||||||
@Override
|
@Override
|
||||||
public synchronized void push(Request request, Task task) {
|
public synchronized void push(Request request, Task task) {
|
||||||
if (!inited.get()) {
|
if (!inited.get()) {
|
||||||
init();
|
init(task);
|
||||||
}
|
}
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("push to queue " + request.getUrl());
|
logger.debug("push to queue " + request.getUrl());
|
||||||
|
@ -142,7 +138,7 @@ public class FileCacheQueueSchedular implements Schedular {
|
||||||
@Override
|
@Override
|
||||||
public synchronized Request poll(Task task) {
|
public synchronized Request poll(Task task) {
|
||||||
if (!inited.get()) {
|
if (!inited.get()) {
|
||||||
init();
|
init(task);
|
||||||
}
|
}
|
||||||
fileCursorWriter.println(cursor.incrementAndGet());
|
fileCursorWriter.println(cursor.incrementAndGet());
|
||||||
return queue.poll();
|
return queue.poll();
|
||||||
|
|
|
@ -5,10 +5,12 @@ import freemarker.template.Template;
|
||||||
import freemarker.template.TemplateException;
|
import freemarker.template.TemplateException;
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.utils.UrlUtils;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.File;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Author: code4crafter@gmail.com
|
* Author: code4crafter@gmail.com
|
||||||
|
@ -37,10 +39,8 @@ public class FreemarkerPipeline implements Pipeline {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(Page page, Site site) {
|
public void process(Page page, Task task) {
|
||||||
String domain = site.getDomain();
|
String path = this.path + "" + task.getUUID() + "/";
|
||||||
domain = UrlUtils.getDomain(domain);
|
|
||||||
String path = this.path + "" + domain + "/";
|
|
||||||
File file = new File(path);
|
File file = new File(path);
|
||||||
if (!file.exists()) {
|
if (!file.exists()) {
|
||||||
file.mkdirs();
|
file.mkdirs();
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class SpiderTest {
|
||||||
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
||||||
System.out.println(pageProcessor2.getSite().getEncoding());
|
System.out.println(pageProcessor2.getSite().getEncoding());
|
||||||
pageProcessor2.getSite().setSleepTime(500);
|
pageProcessor2.getSite().setSleepTime(500);
|
||||||
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")).
|
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||||
processor(pageProcessor2).run();
|
processor(pageProcessor2).run();
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ public class DiandianProcessorTest {
|
||||||
//ConsolePipeline输出结果到控制台
|
//ConsolePipeline输出结果到控制台
|
||||||
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
||||||
//Spider.run()执行
|
//Spider.run()执行
|
||||||
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
|
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||||
processor(diaoyuwengProcessor).run();
|
processor(diaoyuwengProcessor).run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package us.codecraft.webmagic.processor;
|
package us.codecraft.webmagic.processor;
|
||||||
|
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Spider;
|
import us.codecraft.webmagic.Spider;
|
||||||
import us.codecraft.webmagic.pipeline.FilePipeline;
|
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||||
|
@ -16,11 +17,12 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class DiaoyuwengProcessorTest {
|
public class DiaoyuwengProcessorTest {
|
||||||
|
|
||||||
|
@Ignore
|
||||||
@Test
|
@Test
|
||||||
public void test() throws IOException {
|
public void test() throws IOException {
|
||||||
DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor();
|
DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor();
|
||||||
FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl");
|
FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl");
|
||||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
|
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||||
processor(diaoyuwengProcessor).run();
|
processor(diaoyuwengProcessor).run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ public class SinablogProcessorTest {
|
||||||
//ConsolePipeline输出结果到控制台
|
//ConsolePipeline输出结果到控制台
|
||||||
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
|
||||||
//Spider.run()执行
|
//Spider.run()执行
|
||||||
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(sinaBlogProcesser.getSite(), "/data/temp/webmagic/cache/")).
|
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
|
||||||
processor(sinaBlogProcesser).run();
|
processor(sinaBlogProcesser).run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue