tags;
* }
-
+ *
* And start the spider by:
*
* OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
* ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
* }
-
+ *
+ *
* @author code4crafter@gmail.com
* @since 0.2.0
*/
-public class OOSpider extends Spider {
+public class OOSpider extends Spider {
private ModelPageProcessor modelPageProcessor;
private ModelPipeline modelPipeline;
+ private PageModelPipeline pageModelPipeline;
+
+ private List pageModelClasses = new ArrayList();
+
protected OOSpider(ModelPageProcessor modelPageProcessor) {
super(modelPageProcessor);
this.modelPageProcessor = modelPageProcessor;
@@ -49,6 +59,7 @@ public class OOSpider extends Spider {
/**
* create a spider
+ *
* @param site
* @param pageModelPipeline
* @param pageModels
@@ -57,13 +68,19 @@ public class OOSpider extends Spider {
this(ModelPageProcessor.create(site, pageModels));
this.modelPipeline = new ModelPipeline();
super.addPipeline(modelPipeline);
- if (pageModelPipeline!=null){
- for (Class pageModel : pageModels) {
+ for (Class pageModel : pageModels) {
+ if (pageModelPipeline != null) {
this.modelPipeline.put(pageModel, pageModelPipeline);
}
+ pageModelClasses.add(pageModel);
}
}
+ @Override
+ protected CollectorPipeline getCollectorPipeline() {
+ return new PageModelCollectorPipeline(pageModelClasses.get(0));
+ }
+
public static OOSpider create(Site site, Class... pageModels) {
return new OOSpider(site, null, pageModels);
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelCollectorPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelCollectorPipeline.java
new file mode 100644
index 0000000..b61f112
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelCollectorPipeline.java
@@ -0,0 +1,46 @@
+package us.codecraft.webmagic.model;
+
+import us.codecraft.webmagic.ResultItems;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.model.annotation.ExtractBy;
+import us.codecraft.webmagic.pipeline.CollectorPageModelPipeline;
+import us.codecraft.webmagic.pipeline.CollectorPipeline;
+
+import java.lang.annotation.Annotation;
+import java.util.List;
+
+/**
+ * @author code4crafter@gmail.com
+ * @since 0.4.0
+ */
+class PageModelCollectorPipeline implements CollectorPipeline {
+
+ private final CollectorPageModelPipeline classPipeline = new CollectorPageModelPipeline();
+
+ private final Class> clazz;
+
+ PageModelCollectorPipeline(Class> clazz) {
+ this.clazz = clazz;
+ }
+
+ @Override
+ public List getCollected() {
+ return classPipeline.getCollected();
+ }
+
+ @Override
+ public synchronized void process(ResultItems resultItems, Task task) {
+ Object o = resultItems.get(clazz.getCanonicalName());
+ if (o != null) {
+ Annotation annotation = clazz.getAnnotation(ExtractBy.class);
+ if (annotation == null || !((ExtractBy) annotation).multi()) {
+ classPipeline.process((T) o, task);
+ } else {
+ List