commit
4e65dac249
|
@ -4,12 +4,14 @@ import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.http.annotation.ThreadSafe;
|
import org.apache.http.annotation.ThreadSafe;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import us.codecraft.webmagic.ResultItems;
|
import us.codecraft.webmagic.ResultItems;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.utils.FilePersistentBase;
|
import us.codecraft.webmagic.utils.FilePersistentBase;
|
||||||
|
|
||||||
import java.io.FileWriter;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -39,7 +41,7 @@ public class FilePipeline extends FilePersistentBase implements Pipeline {
|
||||||
public void process(ResultItems resultItems, Task task) {
|
public void process(ResultItems resultItems, Task task) {
|
||||||
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
|
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
|
||||||
try {
|
try {
|
||||||
PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html")));
|
PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html")),"UTF-8"));
|
||||||
printWriter.println("url:\t" + resultItems.getRequest().getUrl());
|
printWriter.println("url:\t" + resultItems.getRequest().getUrl());
|
||||||
for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) {
|
for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) {
|
||||||
if (entry.getValue() instanceof Iterable) {
|
if (entry.getValue() instanceof Iterable) {
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
package us.codecraft.webmagic.pipeline;
|
||||||
|
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.ResultItems;
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.Task;
|
||||||
|
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by ywooer on 2014/5/6 0006.
|
||||||
|
*/
|
||||||
|
public class FilePipelineTest {
|
||||||
|
|
||||||
|
private static ResultItems resultItems;
|
||||||
|
private static Task task;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void before() {
|
||||||
|
resultItems = new ResultItems();
|
||||||
|
resultItems.put("content", "webmagic 爬虫工具");
|
||||||
|
Request request = new Request("http://www.baidu.com");
|
||||||
|
resultItems.setRequest(request);
|
||||||
|
|
||||||
|
task = new Task() {
|
||||||
|
@Override
|
||||||
|
public String getUUID() {
|
||||||
|
return UUID.randomUUID().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Site getSite() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testProcess() {
|
||||||
|
FilePipeline filePipeline = new FilePipeline();
|
||||||
|
filePipeline.process(resultItems, task);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue