diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java
index 427cdf7..738d4a7 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java
@@ -78,4 +78,17 @@ public class GithubRepo implements HasKey {
public int getFork() {
return fork;
}
+
+ @Override
+ public String toString() {
+ return "GithubRepo{" +
+ "name='" + name + '\'' +
+ ", author='" + author + '\'' +
+ ", readme='" + readme + '\'' +
+ ", language=" + language +
+ ", star=" + star +
+ ", fork=" + fork +
+ ", url='" + url + '\'' +
+ '}';
+ }
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java
similarity index 69%
rename from webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java
index 3970546..d8bf9fb 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java
@@ -10,9 +10,9 @@ import us.codecraft.webmagic.processor.PageProcessor;
* @author code4crafter@gmail.com
* @since 0.3.2
*/
-public class GithubRepoPageProcessor implements PageProcessor {
+public class GithubRepoPageMapper implements PageProcessor {
- private Site site = Site.me().setRetryTimes(3);
+ private Site site = Site.me().setRetryTimes(3).setSleepTime(0);
private PageMapper githubRepoPageMapper = new PageMapper(GithubRepo.class);
@@ -21,7 +21,12 @@ public class GithubRepoPageProcessor implements PageProcessor {
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all());
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all());
GithubRepo githubRepo = githubRepoPageMapper.get(page);
- page.putField("repo",githubRepo);
+ if (githubRepo == null) {
+ page.setSkip(true);
+ } else {
+ page.putField("repo", githubRepo);
+ }
+
}
@Override
@@ -30,6 +35,6 @@ public class GithubRepoPageProcessor implements PageProcessor {
}
public static void main(String[] args) {
- Spider.create(new GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run();
+ Spider.create(new GithubRepoPageMapper()).addUrl("https://github.com/code4craft").thread(5).run();
}
}
\ No newline at end of file
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java
index f23d936..1cc5ac3 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java
@@ -2,6 +2,8 @@ package us.codecraft.webmagic.model;
import us.codecraft.webmagic.Page;
+import java.util.List;
+
/**
* @author code4crafer@gmail.com
* @since 0.5.2
@@ -10,11 +12,18 @@ public class PageMapper {
private Class clazz;
+ private PageModelExtractor pageModelExtractor;
+
public PageMapper(Class clazz) {
this.clazz = clazz;
+ this.pageModelExtractor = PageModelExtractor.create(clazz);
}
- public T get(Page page){
- return null;
+ public T get(Page page) {
+ return (T) pageModelExtractor.process(page);
+ }
+
+ public List getAll(Page page) {
+ return (List) pageModelExtractor.process(page);
}
}