diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index 6a35178..4c7b992 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -96,11 +96,6 @@ public class Site {
* @return get domain
*/
public String getDomain() {
- if (domain == null) {
- if (startUrls.size() > 0) {
- domain = UrlUtils.getDomain(startUrls.get(0));
- }
- }
return domain;
}
@@ -176,6 +171,11 @@ public class Site {
*/
public Site addStartUrl(String startUrl) {
this.startUrls.add(startUrl);
+ if (domain == null) {
+ if (startUrls.size() > 0) {
+ domain = UrlUtils.getDomain(startUrls.get(0));
+ }
+ }
return this;
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/OschinaBlog.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/OschinaBlog.java
new file mode 100644
index 0000000..703d6a4
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/OschinaBlog.java
@@ -0,0 +1,59 @@
+package us.codecraft.webmagic.example;
+
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.model.AfterExtractor;
+import us.codecraft.webmagic.model.OOSpider;
+import us.codecraft.webmagic.model.annotation.ExtractBy;
+import us.codecraft.webmagic.model.annotation.Formatter;
+import us.codecraft.webmagic.model.annotation.TargetUrl;
+import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
+
+import java.util.List;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
+public class OschinaBlog implements AfterExtractor{
+
+ @ExtractBy("//title/text()")
+ private String title;
+
+ @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
+ private String content;
+
+ @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
+ private List tags;
+
+ @Formatter("YYYY-MM-dd HH:mm")
+ @ExtractBy("//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')")
+ private String date;
+
+ public static void main(String[] args) {
+ OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
+ ,new JsonFilePageModelPipeline("/data/webmagic/"), OschinaBlog.class).run();
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public List getTags() {
+ return tags;
+ }
+
+// public Date getDate() {
+// return date;
+// }
+
+ @Override
+ public void afterProcess(Page page) {
+ System.out.println(date);
+ System.out.println(title);
+ }
+}
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
index 8e6602c..a7f51ad 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
@@ -6,7 +6,6 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
-import java.util.Date;
import java.util.List;
/**
@@ -24,9 +23,6 @@ public class OschinaBlog{
@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
private List tags;
- @ExtractBy("//div[class='BlogStat']/regex('\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}')")
- private Date date;
-
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
,new JsonFilePageModelPipeline(), OschinaBlog.class).run();