Inherit support of Field annotation in Model #103
parent
c8014a9ae6
commit
1fbfc92de2
|
@ -9,6 +9,7 @@ import us.codecraft.webmagic.model.formatter.BasicTypeFormatter;
|
||||||
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
||||||
import us.codecraft.webmagic.model.formatter.ObjectFormatters;
|
import us.codecraft.webmagic.model.formatter.ObjectFormatters;
|
||||||
import us.codecraft.webmagic.selector.*;
|
import us.codecraft.webmagic.selector.*;
|
||||||
|
import us.codecraft.webmagic.utils.ClassUtils;
|
||||||
import us.codecraft.webmagic.utils.ExtractorUtils;
|
import us.codecraft.webmagic.utils.ExtractorUtils;
|
||||||
|
|
||||||
import java.lang.annotation.Annotation;
|
import java.lang.annotation.Annotation;
|
||||||
|
@ -52,8 +53,9 @@ class PageModelExtractor {
|
||||||
private void init(Class clazz) {
|
private void init(Class clazz) {
|
||||||
this.clazz = clazz;
|
this.clazz = clazz;
|
||||||
initClassExtractors();
|
initClassExtractors();
|
||||||
|
clazz.getDeclaredFields()
|
||||||
fieldExtractors = new ArrayList<FieldExtractor>();
|
fieldExtractors = new ArrayList<FieldExtractor>();
|
||||||
for (Field field : clazz.getDeclaredFields()) {
|
for (Field field : ClassUtils.getFieldsIncludeSuperClass(clazz)) {
|
||||||
field.setAccessible(true);
|
field.setAccessible(true);
|
||||||
FieldExtractor fieldExtractor = getAnnotationExtractBy(clazz, field);
|
FieldExtractor fieldExtractor = getAnnotationExtractBy(clazz, field);
|
||||||
FieldExtractor fieldExtractorTmp = getAnnotationExtractCombo(clazz, field);
|
FieldExtractor fieldExtractorTmp = getAnnotationExtractCombo(clazz, field);
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
package us.codecraft.webmagic.utils;
|
||||||
|
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* @since 0.5.0
|
||||||
|
*/
|
||||||
|
public abstract class ClassUtils {
|
||||||
|
|
||||||
|
public static Set<Field> getFieldsIncludeSuperClass(Class clazz) {
|
||||||
|
Set<Field> fields = new LinkedHashSet<Field>();
|
||||||
|
Class current = clazz;
|
||||||
|
while (current != null) {
|
||||||
|
Field[] currentFields = current.getDeclaredFields();
|
||||||
|
for (Field currentField : currentFields) {
|
||||||
|
fields.add(currentField);
|
||||||
|
}
|
||||||
|
current = current.getSuperclass();
|
||||||
|
}
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
*/
|
||||||
|
public class BaseRepo {
|
||||||
|
|
||||||
|
@ExtractBy("//ul[@class='pagehead-actions']/li[1]//a[@class='social-count js-social-count']/text()")
|
||||||
|
protected int star;
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||||
|
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com <br>
|
||||||
|
* @since 0.3.2
|
||||||
|
*/
|
||||||
|
@TargetUrl("https://github.com/\\w+/\\w+")
|
||||||
|
@HelpUrl({"https://github.com/\\w+\\?tab=repositories", "https://github.com/\\w+", "https://github.com/explore/*"})
|
||||||
|
public class GithubRepo extends BaseRepo{
|
||||||
|
|
||||||
|
@ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count']/text()")
|
||||||
|
private int fork;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
OOSpider.create(Site.me().setSleepTime(100)
|
||||||
|
, new ConsolePageModelPipeline(), GithubRepo.class)
|
||||||
|
.addUrl("https://github.com/code4craft").thread(10).run();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getStar() {
|
||||||
|
return star;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFork() {
|
||||||
|
return fork;
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,7 +5,6 @@ import org.junit.Test;
|
||||||
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.example.GithubRepo;
|
|
||||||
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue