[Bugfix]formatter property does not work when field is String#100
parent
cc9d319fd9
commit
3a79b1b64a
|
@ -76,9 +76,21 @@ class PageModelExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkFormat(Field field, FieldExtractor fieldExtractor) {
|
private void checkFormat(Field field, FieldExtractor fieldExtractor) {
|
||||||
|
//check custom formatter
|
||||||
|
Formatter formatter = field.getAnnotation(Formatter.class);
|
||||||
|
if (formatter != null && !formatter.formatter().equals(ObjectFormatter.class)) {
|
||||||
|
if (formatter != null) {
|
||||||
|
if (!formatter.formatter().equals(ObjectFormatter.class)) {
|
||||||
|
ObjectFormatter objectFormatter = initFormatter(formatter.formatter());
|
||||||
|
objectFormatter.initParam(formatter.value());
|
||||||
|
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
|
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
|
||||||
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
|
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
|
||||||
ObjectFormatter objectFormatter = getObjectFormatter(field, fieldClazz);
|
ObjectFormatter objectFormatter = getObjectFormatter(field, fieldClazz, formatter);
|
||||||
if (objectFormatter == null) {
|
if (objectFormatter == null) {
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
||||||
} else {
|
} else {
|
||||||
|
@ -88,10 +100,9 @@ class PageModelExtractor {
|
||||||
if (!List.class.isAssignableFrom(field.getType())) {
|
if (!List.class.isAssignableFrom(field.getType())) {
|
||||||
throw new IllegalStateException("Field " + field.getName() + " must be list");
|
throw new IllegalStateException("Field " + field.getName() + " must be list");
|
||||||
}
|
}
|
||||||
Formatter formatter = field.getAnnotation(Formatter.class);
|
|
||||||
if (formatter != null) {
|
if (formatter != null) {
|
||||||
if (!formatter.subClazz().equals(Void.class)) {
|
if (!formatter.subClazz().equals(Void.class)) {
|
||||||
ObjectFormatter objectFormatter = getObjectFormatter(field, formatter.subClazz());
|
ObjectFormatter objectFormatter = getObjectFormatter(field, formatter.subClazz(), formatter);
|
||||||
if (objectFormatter == null) {
|
if (objectFormatter == null) {
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
|
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
|
||||||
} else {
|
} else {
|
||||||
|
@ -102,14 +113,7 @@ class PageModelExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private ObjectFormatter getObjectFormatter(Field field, Class<?> fieldClazz) {
|
private ObjectFormatter getObjectFormatter(Field field, Class<?> fieldClazz, Formatter formatter) {
|
||||||
Formatter formatter = field.getAnnotation(Formatter.class);
|
|
||||||
if (formatter != null) {
|
|
||||||
if (!formatter.formatter().equals(ObjectFormatter.class)) {
|
|
||||||
ObjectFormatter objectFormatter = initFormatter(formatter.formatter());
|
|
||||||
objectFormatter.initParam(formatter.value());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return initFormatter(ObjectFormatters.get(fieldClazz));
|
return initFormatter(ObjectFormatters.get(fieldClazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,9 @@ package us.codecraft.webmagic.model.samples;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.model.HasKey;
|
import us.codecraft.webmagic.model.HasKey;
|
||||||
import us.codecraft.webmagic.model.OOSpider;
|
import us.codecraft.webmagic.model.OOSpider;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
import us.codecraft.webmagic.model.annotation.*;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
|
|
||||||
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
|
||||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
|
||||||
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
|
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
|
||||||
|
import us.codecraft.webmagic.samples.formatter.StringTemplateFormatter;
|
||||||
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
|
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -22,6 +20,7 @@ public class GithubRepo implements HasKey {
|
||||||
@ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true)
|
@ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true)
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
|
@Formatter(value = "author%s",formatter = StringTemplateFormatter.class)
|
||||||
@ExtractByUrl("https://github\\.com/(\\w+)/.*")
|
@ExtractByUrl("https://github\\.com/(\\w+)/.*")
|
||||||
private String author;
|
private String author;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
package us.codecraft.webmagic.samples.formatter;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author yihua.huang@dianping.com
|
||||||
|
*/
|
||||||
|
public class StringTemplateFormatter implements ObjectFormatter<String> {
|
||||||
|
|
||||||
|
private String template;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String format(String raw) throws Exception {
|
||||||
|
return String.format(template, raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class<String> clazz() {
|
||||||
|
return String.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initParam(String[] extra) {
|
||||||
|
template = extra[0];
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue