diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java index 427cdf7..738d4a7 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepo.java @@ -78,4 +78,17 @@ public class GithubRepo implements HasKey { public int getFork() { return fork; } + + @Override + public String toString() { + return "GithubRepo{" + + "name='" + name + '\'' + + ", author='" + author + '\'' + + ", readme='" + readme + '\'' + + ", language=" + language + + ", star=" + star + + ", fork=" + fork + + ", url='" + url + '\'' + + '}'; + } } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java similarity index 69% rename from webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java rename to webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java index 3970546..d8bf9fb 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageProcessor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoPageMapper.java @@ -10,9 +10,9 @@ import us.codecraft.webmagic.processor.PageProcessor; * @author code4crafter@gmail.com
* @since 0.3.2 */ -public class GithubRepoPageProcessor implements PageProcessor { +public class GithubRepoPageMapper implements PageProcessor { - private Site site = Site.me().setRetryTimes(3); + private Site site = Site.me().setRetryTimes(3).setSleepTime(0); private PageMapper githubRepoPageMapper = new PageMapper(GithubRepo.class); @@ -21,7 +21,12 @@ public class GithubRepoPageProcessor implements PageProcessor { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all()); page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all()); GithubRepo githubRepo = githubRepoPageMapper.get(page); - page.putField("repo",githubRepo); + if (githubRepo == null) { + page.setSkip(true); + } else { + page.putField("repo", githubRepo); + } + } @Override @@ -30,6 +35,6 @@ public class GithubRepoPageProcessor implements PageProcessor { } public static void main(String[] args) { - Spider.create(new GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run(); + Spider.create(new GithubRepoPageMapper()).addUrl("https://github.com/code4craft").thread(5).run(); } } \ No newline at end of file diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java index f23d936..1cc5ac3 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageMapper.java @@ -2,6 +2,8 @@ package us.codecraft.webmagic.model; import us.codecraft.webmagic.Page; +import java.util.List; + /** * @author code4crafer@gmail.com * @since 0.5.2 @@ -10,11 +12,18 @@ public class PageMapper { private Class clazz; + private PageModelExtractor pageModelExtractor; + public PageMapper(Class clazz) { this.clazz = clazz; + this.pageModelExtractor = PageModelExtractor.create(clazz); } - public T get(Page page){ - return null; + public T get(Page page) { + return (T) pageModelExtractor.process(page); + } + + public List getAll(Page page) { + return (List) pageModelExtractor.process(page); } }