From 478ace7e973d5ae924ed3345722cf4ef143c0df8 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Thu, 22 Aug 2013 07:29:18 +0800 Subject: [PATCH] add FilePageModelPipeline --- .../pipeline/FilePageModelPipeline.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java new file mode 100644 index 0000000..d3ed1f0 --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java @@ -0,0 +1,55 @@ +package us.codecraft.webmagic.pipeline; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.log4j.Logger; +import us.codecraft.webmagic.Task; +import us.codecraft.webmagic.model.HasKey; +import us.codecraft.webmagic.model.PageModelPipeline; +import us.codecraft.webmagic.utils.FilePersistentBase; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; + +/** + * Store results objects (page models) to files in plain format.
+ * Use model.getKey() as file name if the model implements HasKey.
+ * Otherwise use SHA1 as file name. + * + * @author code4crafter@gmail.com
+ * @since 0.2.2 + */ +public class FilePageModelPipeline extends FilePersistentBase implements PageModelPipeline { + + private Logger logger = Logger.getLogger(getClass()); + + /** + * new JsonFilePageModelPipeline with default path "/data/webmagic/" + */ + public FilePageModelPipeline() { + setPath("/data/webmagic/"); + } + + public FilePageModelPipeline(String path) { + setPath(path); + } + + @Override + public void process(Object o, Task task) { + String path = this.path + "/" + task.getUUID() + "/"; + try { + String filename; + if (o instanceof HasKey) { + filename = path + ((HasKey) o).key() + ".html"; + } else { + filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".html"; + } + PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(filename))); + printWriter.write(ToStringBuilder.reflectionToString(o)); + printWriter.close(); + } catch (IOException e) { + logger.warn("write file error", e); + } + } +}