diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index b7b7900..3f92b28 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -76,9 +76,21 @@ class PageModelExtractor { } private void checkFormat(Field field, FieldExtractor fieldExtractor) { + //check custom formatter + Formatter formatter = field.getAnnotation(Formatter.class); + if (formatter != null && !formatter.formatter().equals(ObjectFormatter.class)) { + if (formatter != null) { + if (!formatter.formatter().equals(ObjectFormatter.class)) { + ObjectFormatter objectFormatter = initFormatter(formatter.formatter()); + objectFormatter.initParam(formatter.value()); + fieldExtractor.setObjectFormatter(objectFormatter); + return; + } + } + } if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) { Class fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType()); - ObjectFormatter objectFormatter = getObjectFormatter(field, fieldClazz); + ObjectFormatter objectFormatter = getObjectFormatter(field, fieldClazz, formatter); if (objectFormatter == null) { throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz); } else { @@ -88,10 +100,9 @@ class PageModelExtractor { if (!List.class.isAssignableFrom(field.getType())) { throw new IllegalStateException("Field " + field.getName() + " must be list"); } - Formatter formatter = field.getAnnotation(Formatter.class); if (formatter != null) { if (!formatter.subClazz().equals(Void.class)) { - ObjectFormatter objectFormatter = getObjectFormatter(field, formatter.subClazz()); + ObjectFormatter objectFormatter = getObjectFormatter(field, formatter.subClazz(), formatter); if (objectFormatter == null) { throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz()); } else { @@ -102,14 +113,7 @@ class PageModelExtractor { } } - private ObjectFormatter getObjectFormatter(Field field, Class fieldClazz) { - Formatter formatter = field.getAnnotation(Formatter.class); - if (formatter != null) { - if (!formatter.formatter().equals(ObjectFormatter.class)) { - ObjectFormatter objectFormatter = initFormatter(formatter.formatter()); - objectFormatter.initParam(formatter.value()); - } - } + private ObjectFormatter getObjectFormatter(Field field, Class fieldClazz, Formatter formatter) { return initFormatter(ObjectFormatters.get(fieldClazz)); } diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java index e8998ec..57de3f1 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java @@ -3,11 +3,9 @@ package us.codecraft.webmagic.model.samples; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.model.HasKey; import us.codecraft.webmagic.model.OOSpider; -import us.codecraft.webmagic.model.annotation.ExtractBy; -import us.codecraft.webmagic.model.annotation.ExtractByUrl; -import us.codecraft.webmagic.model.annotation.HelpUrl; -import us.codecraft.webmagic.model.annotation.TargetUrl; +import us.codecraft.webmagic.model.annotation.*; import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline; +import us.codecraft.webmagic.samples.formatter.StringTemplateFormatter; import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler; import java.util.List; @@ -22,6 +20,7 @@ public class GithubRepo implements HasKey { @ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true) private String name; + @Formatter(value = "author%s",formatter = StringTemplateFormatter.class) @ExtractByUrl("https://github\\.com/(\\w+)/.*") private String author; diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/formatter/StringTemplateFormatter.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/formatter/StringTemplateFormatter.java new file mode 100644 index 0000000..7b38125 --- /dev/null +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/formatter/StringTemplateFormatter.java @@ -0,0 +1,26 @@ +package us.codecraft.webmagic.samples.formatter; + +import us.codecraft.webmagic.model.formatter.ObjectFormatter; + +/** + * @author yihua.huang@dianping.com + */ +public class StringTemplateFormatter implements ObjectFormatter { + + private String template; + + @Override + public String format(String raw) throws Exception { + return String.format(template, raw); + } + + @Override + public Class clazz() { + return String.class; + } + + @Override + public void initParam(String[] extra) { + template = extra[0]; + } +}