add charset to Writer
parent
7fbe18b8c0
commit
26d38851b5
|
@ -4,12 +4,14 @@ import org.apache.commons.codec.digest.DigestUtils;
|
|||
import org.apache.http.annotation.ThreadSafe;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.utils.FilePersistentBase;
|
||||
|
||||
import java.io.FileWriter;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -39,7 +41,7 @@ public class FilePipeline extends FilePersistentBase implements Pipeline {
|
|||
public void process(ResultItems resultItems, Task task) {
|
||||
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
|
||||
try {
|
||||
PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html")));
|
||||
PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".html")),"UTF-8"));
|
||||
printWriter.println("url:\t" + resultItems.getRequest().getUrl());
|
||||
for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) {
|
||||
if (entry.getValue() instanceof Iterable) {
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
package us.codecraft.webmagic.pipeline;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Created by ywooer on 2014/5/6 0006.
|
||||
*/
|
||||
public class FilePipelineTest {
|
||||
|
||||
private static ResultItems resultItems;
|
||||
private static Task task;
|
||||
|
||||
@BeforeClass
|
||||
public static void before() {
|
||||
resultItems = new ResultItems();
|
||||
resultItems.put("content", "webmagic 爬虫工具");
|
||||
Request request = new Request("http://www.baidu.com");
|
||||
resultItems.setRequest(request);
|
||||
|
||||
task = new Task() {
|
||||
@Override
|
||||
public String getUUID() {
|
||||
return UUID.randomUUID().toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
@Test
|
||||
public void testProcess() {
|
||||
FilePipeline filePipeline = new FilePipeline();
|
||||
filePipeline.process(resultItems, task);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue