From 1fbfc92de2f2310d697747c7af7e5d5276a73439 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Wed, 16 Apr 2014 18:13:44 +0800 Subject: [PATCH] Inherit support of Field annotation in Model #103 --- .../webmagic/model/PageModelExtractor.java | 4 ++- .../codecraft/webmagic/utils/ClassUtils.java | 26 +++++++++++++++ .../us/codecraft/webmagic/model/BaseRepo.java | 12 +++++++ .../codecraft/webmagic/model/GithubRepo.java | 32 +++++++++++++++++++ .../webmagic/model/GithubRepoTest.java | 1 - 5 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ClassUtils.java create mode 100644 webmagic-extension/src/test/java/us/codecraft/webmagic/model/BaseRepo.java create mode 100644 webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepo.java diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index 3f92b28..8330edf 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -9,6 +9,7 @@ import us.codecraft.webmagic.model.formatter.BasicTypeFormatter; import us.codecraft.webmagic.model.formatter.ObjectFormatter; import us.codecraft.webmagic.model.formatter.ObjectFormatters; import us.codecraft.webmagic.selector.*; +import us.codecraft.webmagic.utils.ClassUtils; import us.codecraft.webmagic.utils.ExtractorUtils; import java.lang.annotation.Annotation; @@ -52,8 +53,9 @@ class PageModelExtractor { private void init(Class clazz) { this.clazz = clazz; initClassExtractors(); + clazz.getDeclaredFields() fieldExtractors = new ArrayList(); - for (Field field : clazz.getDeclaredFields()) { + for (Field field : ClassUtils.getFieldsIncludeSuperClass(clazz)) { field.setAccessible(true); FieldExtractor fieldExtractor = getAnnotationExtractBy(clazz, field); FieldExtractor fieldExtractorTmp = getAnnotationExtractCombo(clazz, field); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ClassUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ClassUtils.java new file mode 100644 index 0000000..ed22a4e --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ClassUtils.java @@ -0,0 +1,26 @@ +package us.codecraft.webmagic.utils; + +import java.lang.reflect.Field; +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * @author code4crafter@gmail.com + * @since 0.5.0 + */ +public abstract class ClassUtils { + + public static Set getFieldsIncludeSuperClass(Class clazz) { + Set fields = new LinkedHashSet(); + Class current = clazz; + while (current != null) { + Field[] currentFields = current.getDeclaredFields(); + for (Field currentField : currentFields) { + fields.add(currentField); + } + current = current.getSuperclass(); + } + return fields; + } + +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/BaseRepo.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/BaseRepo.java new file mode 100644 index 0000000..2d9cf94 --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/BaseRepo.java @@ -0,0 +1,12 @@ +package us.codecraft.webmagic.model; + +import us.codecraft.webmagic.model.annotation.ExtractBy; + +/** + * @author code4crafter@gmail.com + */ +public class BaseRepo { + + @ExtractBy("//ul[@class='pagehead-actions']/li[1]//a[@class='social-count js-social-count']/text()") + protected int star; +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepo.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepo.java new file mode 100644 index 0000000..d825a1f --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepo.java @@ -0,0 +1,32 @@ +package us.codecraft.webmagic.model; + +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.model.annotation.ExtractBy; +import us.codecraft.webmagic.model.annotation.HelpUrl; +import us.codecraft.webmagic.model.annotation.TargetUrl; + +/** + * @author code4crafter@gmail.com
+ * @since 0.3.2 + */ +@TargetUrl("https://github.com/\\w+/\\w+") +@HelpUrl({"https://github.com/\\w+\\?tab=repositories", "https://github.com/\\w+", "https://github.com/explore/*"}) +public class GithubRepo extends BaseRepo{ + + @ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count']/text()") + private int fork; + + public static void main(String[] args) { + OOSpider.create(Site.me().setSleepTime(100) + , new ConsolePageModelPipeline(), GithubRepo.class) + .addUrl("https://github.com/code4craft").thread(10).run(); + } + + public int getStar() { + return star; + } + + public int getFork() { + return fork; + } +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java index 85b6858..d9501a2 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java @@ -5,7 +5,6 @@ import org.junit.Test; import us.codecraft.webmagic.downloader.MockGithubDownloader; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.example.GithubRepo; import us.codecraft.webmagic.pipeline.PageModelPipeline; /**