getOtherPages();
+
+ /**
+ * Combine multiPageModels to a whole object.
+ *
+ * @param multiPageModel
+ * @return multiPageModel combined
+ */
public MultiPageModel combine(MultiPageModel multiPageModel);
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java
index 3927d11..5b74309 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java
@@ -6,8 +6,6 @@ import us.codecraft.webmagic.Page;
* 实现这个接口即可在抽取后进行后处理。
*
* @author code4crafter@gmail.com
- * Date: 13-8-3
- * Time: 上午9:42
*/
public interface AfterExtractor {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java
index 977dcde..e04a30d 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java
@@ -5,10 +5,26 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
/**
- * 基于Model的Spider,封装后的入口类。
+ * The spider for page model extractor。
+ * In webmagic, we call a POJO containing extract result as "page model".
+ * You can customize a crawler by write a page model with annotations.
+ * Such as:
+ *
+ * {@literal @}TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
+ * public class OschinaBlog{
+ *
+ * {@literal @}ExtractBy("//title")
+ * private String title;
+ *
+ * {@literal @}ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
+ * private String content;
+ *
+ * {@literal @}ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
+ * private List tags;
+ * }
+
* @author code4crafter@gmail.com
- * Date: 13-8-3
- * Time: 上午9:51
+ * @since 0.2.0
*/
public class OOSpider extends Spider {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/package.html b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/package.html
index d62cc00..63a6784 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/package.html
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/package.html
@@ -1,5 +1,5 @@
-webmagic对抓取器编写的面向模型(称为PageModel)的封装。基于POJO及注解即可实现一个PageProcessor。
+Page model and annotations used to customize a crawler.
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
index a7d8378..d053716 100755
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
@@ -2,7 +2,6 @@ package us.codecraft.webmagic.utils;
/**
* @author code4crafter@gmail.com
- * Date Dec 14, 2012
*/
import java.util.HashMap;