diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java
new file mode 100644
index 0000000..dd9ace2
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java
@@ -0,0 +1,20 @@
+package us.codecraft.webmagic.model;
+
+/**
+ * 标志一个Model的key。
+ * 实现了这个接口的Model在输出时会使用getKey()作为标志(例如JsonFilePageModelPipeline中持久化的文件名)。
+ * 如果持久化的文件名是乱码,请再运行的环境变量里加上LANG=zh_CN.UTF-8 。
+ *
+ * @author code4crafter@gmail.com
+ * Date: 13-8-10
+ * Time: 上午7:39
+ */
+public interface HasKey {
+
+ /**
+ * 在输出时会使用key作为标志(例如JsonFilePageModelPipeline中持久化的文件名)。
+ *
+ * @return key
+ */
+ public String key();
+}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java
new file mode 100644
index 0000000..a6b73cc
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java
@@ -0,0 +1,70 @@
+package us.codecraft.webmagic.pipeline;
+
+import com.alibaba.fastjson.JSON;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.log4j.Logger;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.model.HasKey;
+import us.codecraft.webmagic.model.PageModelPipeline;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+/**
+ * JSON格式持久化到文件的接口。
+ * 如果持久化的文件名是乱码,请再运行的环境变量里加上LANG=zh_CN.UTF-8。
+ *
+ * @author code4crafter@gmail.com
+ * Date: 13-4-21
+ * Time: 下午6:28
+ */
+public class JsonFilePageModelPipeline implements PageModelPipeline {
+
+ private String path = "/data/webmagic/";
+
+ private Logger logger = Logger.getLogger(getClass());
+
+ /**
+ * 新建一个FilePipeline,使用默认保存路径"/data/webmagic/"
+ */
+ public JsonFilePageModelPipeline() {
+
+ }
+
+ /**
+ * 新建一个FilePipeline
+ *
+ * @param path 文件保存路径
+ */
+ public JsonFilePageModelPipeline(String path) {
+ if (!path.endsWith("/") && !path.endsWith("\\")) {
+ path += "/";
+ }
+ this.path = path;
+ }
+
+ @Override
+ public void process(Object o, Task task) {
+ String path = this.path + "/" + task.getUUID() + "/";
+ File file = new File(path);
+ if (!file.exists()) {
+ file.mkdirs();
+ }
+ try {
+ String filename;
+ if (o instanceof HasKey) {
+ filename = path + ((HasKey)o).key() + ".json";
+ } else {
+ filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".json";
+ }
+ PrintWriter printWriter = new PrintWriter(new FileWriter(filename));
+ printWriter.write(JSON.toJSONString(o));
+ printWriter.close();
+ } catch (IOException e) {
+ logger.warn("write file error", e);
+ }
+ }
+}
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
index c1e3ea3..a76fd88 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
@@ -1,10 +1,11 @@
package us.codecraft.webmagic.model.samples;
import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.model.ConsolePageModelPipeline;
+import us.codecraft.webmagic.model.HasKey;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
+import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
import java.util.List;
@@ -14,7 +15,7 @@ import java.util.List;
* Time: 上午7:52
*/
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
-public class OschinaBlog {
+public class OschinaBlog implements HasKey{
@ExtractBy("//title")
private String title;
@@ -27,7 +28,23 @@ public class OschinaBlog {
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
- ,new ConsolePageModelPipeline(), OschinaBlog.class).run();
+ ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
}
+ public String getTitle() {
+ return title;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public List getTags() {
+ return tags;
+ }
+
+ @Override
+ public String key() {
+ return title;
+ }
}