Merge pull request #370 from gyk001/master

fixed #301 修复使用注解抽取JSON数据的问题
master
Yihua Huang 2016-11-19 12:59:48 +08:00 committed by GitHub
commit 228911b58c
4 changed files with 32 additions and 8 deletions

View File

@ -15,19 +15,19 @@ import java.util.List;
*/ */
public class GithubRepoApi implements HasKey { public class GithubRepoApi implements HasKey {
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name") @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name", source = ExtractBy.Source.RawText)
private String name; private String name;
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..owner.login") @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..owner.login", source = ExtractBy.Source.RawText)
private String author; private String author;
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.language",multi = true) @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.language",multi = true, source = ExtractBy.Source.RawText)
private List<String> language; private List<String> language;
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.stargazers_count") @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.stargazers_count", source = ExtractBy.Source.RawText)
private int star; private int star;
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.homepage") @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.forks_count", source = ExtractBy.Source.RawText)
private int fork; private int fork;
@ExtractByUrl @ExtractByUrl

View File

@ -17,7 +17,7 @@ class Extractor {
protected final boolean multi; protected final boolean multi;
static enum Source {Html, Url, RawHtml} static enum Source {Html, Url, RawHtml, RawText}
public Extractor(Selector selector, Source source, boolean notNull, boolean multi) { public Extractor(Selector selector, Source source, boolean notNull, boolean multi) {
this.selector = selector; this.selector = selector;

View File

@ -179,7 +179,24 @@ class PageModelExtractor {
ExtractBy extractBy = field.getAnnotation(ExtractBy.class); ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
if (extractBy != null) { if (extractBy != null) {
Selector selector = ExtractorUtils.getSelector(extractBy); Selector selector = ExtractorUtils.getSelector(extractBy);
fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
FieldExtractor.Source source = null;
switch (extractBy.source()){
case RawText:
source = FieldExtractor.Source.RawText;
break;
case RawHtml:
source = FieldExtractor.Source.RawHtml;
break;
case SelectedHtml:
source =FieldExtractor.Source.Html;
break;
default:
source =FieldExtractor.Source.Html;
}
fieldExtractor = new FieldExtractor(field, selector, source,
extractBy.notNull(), extractBy.multi() || List.class.isAssignableFrom(field.getType())); extractBy.notNull(), extractBy.multi() || List.class.isAssignableFrom(field.getType()));
Method setterMethod = getSetterMethod(clazz, field); Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) { if (setterMethod != null) {
@ -284,6 +301,9 @@ class PageModelExtractor {
case Url: case Url:
value = fieldExtractor.getSelector().selectList(page.getUrl().toString()); value = fieldExtractor.getSelector().selectList(page.getUrl().toString());
break; break;
case RawText:
value = fieldExtractor.getSelector().selectList(page.getRawText());
break;
default: default:
value = fieldExtractor.getSelector().selectList(html); value = fieldExtractor.getSelector().selectList(html);
} }
@ -312,6 +332,9 @@ class PageModelExtractor {
case Url: case Url:
value = fieldExtractor.getSelector().select(page.getUrl().toString()); value = fieldExtractor.getSelector().select(page.getUrl().toString());
break; break;
case RawText:
value = fieldExtractor.getSelector().select(page.getRawText());
break;
default: default:
value = fieldExtractor.getSelector().select(html); value = fieldExtractor.getSelector().select(html);
} }

View File

@ -52,7 +52,8 @@ public @interface ExtractBy {
/** /**
* extract from the raw html * extract from the raw html
*/ */
RawHtml RawHtml,
RawText
} }
/** /**