From a6f8ed547637362b5fb55d940f47f4751bdb2b10 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 3 Jun 2017 22:07:53 +0800 Subject: [PATCH] complete formatter refactor by ObjectFormatterBuilder #586 --- .../webmagic/model/PageModelExtractor.java | 51 +---------------- .../formatter/ObjectFormatterBuilder.java | 56 +++++++++++++++++++ .../model/formatter/ObjectFormatters.java | 4 +- .../model/PageModelExtractorTest.java | 9 +-- .../test/resources/html/mock-webmagic.html | 8 +-- 5 files changed, 69 insertions(+), 59 deletions(-) create mode 100644 webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatterBuilder.java diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index db5326b..375fe5f 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -5,9 +5,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.model.annotation.*; -import us.codecraft.webmagic.model.formatter.BasicTypeFormatter; import us.codecraft.webmagic.model.formatter.ObjectFormatter; -import us.codecraft.webmagic.model.formatter.ObjectFormatters; +import us.codecraft.webmagic.model.formatter.ObjectFormatterBuilder; import us.codecraft.webmagic.selector.*; import us.codecraft.webmagic.utils.ClassUtils; import us.codecraft.webmagic.utils.ExtractorUtils; @@ -70,58 +69,12 @@ class PageModelExtractor { fieldExtractor = fieldExtractorTmp; } if (fieldExtractor != null) { - checkFormat(field, fieldExtractor); + fieldExtractor.setObjectFormatter(new ObjectFormatterBuilder().setField(field).build()); fieldExtractors.add(fieldExtractor); } } } - private void checkFormat(Field field, FieldExtractor fieldExtractor) { - //check custom formatter - Formatter formatter = field.getAnnotation(Formatter.class); - if (formatter == null) { - return; - } - if (!formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) { - ObjectFormatter objectFormatter = initFormatter(formatter.formatter(), formatter.value()); - fieldExtractor.setObjectFormatter(objectFormatter); - return; - } - if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) { - Class fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType()); - ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(fieldClazz), formatter.value()); - if (objectFormatter == null) { - throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz); - } else { - fieldExtractor.setObjectFormatter(objectFormatter); - } - } else if (fieldExtractor.isMulti()) { - if (!List.class.isAssignableFrom(field.getType())) { - throw new IllegalStateException("Field " + field.getName() + " must be list"); - } - if (!formatter.subClazz().equals(Void.class)) { - ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(formatter.subClazz()), formatter.value()); - if (objectFormatter == null) { - throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz()); - } else { - fieldExtractor.setObjectFormatter(objectFormatter); - } - } - } - } - - private ObjectFormatter initFormatter(Class formatterClazz, String[] params) { - try { - ObjectFormatter objectFormatter = formatterClazz.newInstance(); - objectFormatter.initParam(params); - return objectFormatter; - } catch (InstantiationException e) { - throw new RuntimeException(e); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } - private FieldExtractor getAnnotationExtractByUrl(Class clazz, Field field) { FieldExtractor fieldExtractor = null; ExtractByUrl extractByUrl = field.getAnnotation(ExtractByUrl.class); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatterBuilder.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatterBuilder.java new file mode 100644 index 0000000..4c32dfc --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatterBuilder.java @@ -0,0 +1,56 @@ +package us.codecraft.webmagic.model.formatter; + +import us.codecraft.webmagic.model.annotation.Formatter; + +import java.lang.reflect.Field; +import java.util.List; + +/** + * @author code4crafter@gmail.com + * @since 0.7.0 + * Date: 2017/6/3 + */ +public class ObjectFormatterBuilder { + + private Field field; + + public ObjectFormatterBuilder setField(Field field) { + this.field = field; + return this; + } + + private ObjectFormatter initFormatterForType(Class fieldClazz, String[] params) { + if (fieldClazz.equals(String.class) || List.class.isAssignableFrom(fieldClazz)){ + return null; + } + Class formatterClass = ObjectFormatters.get(BasicTypeFormatter.detectBasicClass(fieldClazz)); + if (formatterClass == null) { + throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz); + } + return initFormatter(formatterClass, params); + } + + private ObjectFormatter initFormatter(Class formatterClazz, String[] params) { + try { + ObjectFormatter objectFormatter = formatterClazz.newInstance(); + objectFormatter.initParam(params); + return objectFormatter; + } catch (InstantiationException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public ObjectFormatter build() { + Formatter formatter = field.getAnnotation(Formatter.class); + if (formatter != null && !formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) { + return initFormatter(formatter.formatter(), formatter.value()); + } + if (formatter == null || formatter.subClazz().equals(Void.class)) { + return initFormatterForType(field.getType(), formatter != null ? formatter.value() : null); + } else { + return initFormatterForType(formatter.subClazz(), formatter.value()); + } + } +} diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatters.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatters.java index 7534e5e..42747e7 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatters.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/formatter/ObjectFormatters.java @@ -22,9 +22,9 @@ public class ObjectFormatters { try { formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter); } catch (InstantiationException e) { - e.printStackTrace(); + throw new RuntimeException(e); } catch (IllegalAccessException e) { - e.printStackTrace(); + throw new RuntimeException(e); } } diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java index 77739e5..e464a7a 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java @@ -1,6 +1,7 @@ package us.codecraft.webmagic.model; import org.apache.commons.lang3.time.DateFormatUtils; +import org.apache.commons.lang3.time.DateUtils; import org.junit.Test; import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.Formatter; @@ -44,7 +45,7 @@ public class PageModelExtractorTest { public static class ModelStringList { - @ExtractBy("//a/@href") + @ExtractBy("//li[@class='list']/a/@href") private List links; } @@ -86,18 +87,18 @@ public class PageModelExtractorTest { @Test public void testExtractList() throws Exception { ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage()); - assertThat(modelDate.links).hasSize(8); + assertThat(modelDate.links).containsExactly("http://webmagic.io/list/1","http://webmagic.io/list/2","http://webmagic.io/list/3","http://webmagic.io/list/4"); } @Test public void testExtractIntList() throws Exception { ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage()); - assertThat(modelDate.numbers).hasSize(4); + assertThat(modelDate.numbers).containsExactly(1,2,3,4); } @Test public void testExtractDateList() throws Exception { ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage()); - assertThat(modelDate.dates).hasSize(4); + assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd")); } } diff --git a/webmagic-extension/src/test/resources/html/mock-webmagic.html b/webmagic-extension/src/test/resources/html/mock-webmagic.html index ed09a13..2347629 100644 --- a/webmagic-extension/src/test/resources/html/mock-webmagic.html +++ b/webmagic-extension/src/test/resources/html/mock-webmagic.html @@ -10,14 +10,14 @@