complete formatter refactor by ObjectFormatterBuilder #586
parent
b1ef61b278
commit
a6f8ed5476
|
@ -5,9 +5,8 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.model.annotation.*;
|
import us.codecraft.webmagic.model.annotation.*;
|
||||||
import us.codecraft.webmagic.model.formatter.BasicTypeFormatter;
|
|
||||||
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
import us.codecraft.webmagic.model.formatter.ObjectFormatter;
|
||||||
import us.codecraft.webmagic.model.formatter.ObjectFormatters;
|
import us.codecraft.webmagic.model.formatter.ObjectFormatterBuilder;
|
||||||
import us.codecraft.webmagic.selector.*;
|
import us.codecraft.webmagic.selector.*;
|
||||||
import us.codecraft.webmagic.utils.ClassUtils;
|
import us.codecraft.webmagic.utils.ClassUtils;
|
||||||
import us.codecraft.webmagic.utils.ExtractorUtils;
|
import us.codecraft.webmagic.utils.ExtractorUtils;
|
||||||
|
@ -70,58 +69,12 @@ class PageModelExtractor {
|
||||||
fieldExtractor = fieldExtractorTmp;
|
fieldExtractor = fieldExtractorTmp;
|
||||||
}
|
}
|
||||||
if (fieldExtractor != null) {
|
if (fieldExtractor != null) {
|
||||||
checkFormat(field, fieldExtractor);
|
fieldExtractor.setObjectFormatter(new ObjectFormatterBuilder().setField(field).build());
|
||||||
fieldExtractors.add(fieldExtractor);
|
fieldExtractors.add(fieldExtractor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkFormat(Field field, FieldExtractor fieldExtractor) {
|
|
||||||
//check custom formatter
|
|
||||||
Formatter formatter = field.getAnnotation(Formatter.class);
|
|
||||||
if (formatter == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
|
|
||||||
ObjectFormatter objectFormatter = initFormatter(formatter.formatter(), formatter.value());
|
|
||||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
|
|
||||||
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
|
|
||||||
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(fieldClazz), formatter.value());
|
|
||||||
if (objectFormatter == null) {
|
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
|
||||||
} else {
|
|
||||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
|
||||||
}
|
|
||||||
} else if (fieldExtractor.isMulti()) {
|
|
||||||
if (!List.class.isAssignableFrom(field.getType())) {
|
|
||||||
throw new IllegalStateException("Field " + field.getName() + " must be list");
|
|
||||||
}
|
|
||||||
if (!formatter.subClazz().equals(Void.class)) {
|
|
||||||
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(formatter.subClazz()), formatter.value());
|
|
||||||
if (objectFormatter == null) {
|
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
|
|
||||||
} else {
|
|
||||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz, String[] params) {
|
|
||||||
try {
|
|
||||||
ObjectFormatter objectFormatter = formatterClazz.newInstance();
|
|
||||||
objectFormatter.initParam(params);
|
|
||||||
return objectFormatter;
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private FieldExtractor getAnnotationExtractByUrl(Class clazz, Field field) {
|
private FieldExtractor getAnnotationExtractByUrl(Class clazz, Field field) {
|
||||||
FieldExtractor fieldExtractor = null;
|
FieldExtractor fieldExtractor = null;
|
||||||
ExtractByUrl extractByUrl = field.getAnnotation(ExtractByUrl.class);
|
ExtractByUrl extractByUrl = field.getAnnotation(ExtractByUrl.class);
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
package us.codecraft.webmagic.model.formatter;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.model.annotation.Formatter;
|
||||||
|
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* @since 0.7.0
|
||||||
|
* Date: 2017/6/3
|
||||||
|
*/
|
||||||
|
public class ObjectFormatterBuilder {
|
||||||
|
|
||||||
|
private Field field;
|
||||||
|
|
||||||
|
public ObjectFormatterBuilder setField(Field field) {
|
||||||
|
this.field = field;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ObjectFormatter initFormatterForType(Class<?> fieldClazz, String[] params) {
|
||||||
|
if (fieldClazz.equals(String.class) || List.class.isAssignableFrom(fieldClazz)){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Class<? extends ObjectFormatter> formatterClass = ObjectFormatters.get(BasicTypeFormatter.detectBasicClass(fieldClazz));
|
||||||
|
if (formatterClass == null) {
|
||||||
|
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
||||||
|
}
|
||||||
|
return initFormatter(formatterClass, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz, String[] params) {
|
||||||
|
try {
|
||||||
|
ObjectFormatter objectFormatter = formatterClazz.newInstance();
|
||||||
|
objectFormatter.initParam(params);
|
||||||
|
return objectFormatter;
|
||||||
|
} catch (InstantiationException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ObjectFormatter build() {
|
||||||
|
Formatter formatter = field.getAnnotation(Formatter.class);
|
||||||
|
if (formatter != null && !formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
|
||||||
|
return initFormatter(formatter.formatter(), formatter.value());
|
||||||
|
}
|
||||||
|
if (formatter == null || formatter.subClazz().equals(Void.class)) {
|
||||||
|
return initFormatterForType(field.getType(), formatter != null ? formatter.value() : null);
|
||||||
|
} else {
|
||||||
|
return initFormatterForType(formatter.subClazz(), formatter.value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,9 +22,9 @@ public class ObjectFormatters {
|
||||||
try {
|
try {
|
||||||
formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter);
|
formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter);
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
e.printStackTrace();
|
throw new RuntimeException(e);
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
e.printStackTrace();
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
import org.apache.commons.lang3.time.DateFormatUtils;
|
import org.apache.commons.lang3.time.DateFormatUtils;
|
||||||
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
import us.codecraft.webmagic.model.annotation.Formatter;
|
import us.codecraft.webmagic.model.annotation.Formatter;
|
||||||
|
@ -44,7 +45,7 @@ public class PageModelExtractorTest {
|
||||||
|
|
||||||
public static class ModelStringList {
|
public static class ModelStringList {
|
||||||
|
|
||||||
@ExtractBy("//a/@href")
|
@ExtractBy("//li[@class='list']/a/@href")
|
||||||
private List<String> links;
|
private List<String> links;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -86,18 +87,18 @@ public class PageModelExtractorTest {
|
||||||
@Test
|
@Test
|
||||||
public void testExtractList() throws Exception {
|
public void testExtractList() throws Exception {
|
||||||
ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage());
|
ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage());
|
||||||
assertThat(modelDate.links).hasSize(8);
|
assertThat(modelDate.links).containsExactly("http://webmagic.io/list/1","http://webmagic.io/list/2","http://webmagic.io/list/3","http://webmagic.io/list/4");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractIntList() throws Exception {
|
public void testExtractIntList() throws Exception {
|
||||||
ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage());
|
ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage());
|
||||||
assertThat(modelDate.numbers).hasSize(4);
|
assertThat(modelDate.numbers).containsExactly(1,2,3,4);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractDateList() throws Exception {
|
public void testExtractDateList() throws Exception {
|
||||||
ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage());
|
ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage());
|
||||||
assertThat(modelDate.dates).hasSize(4);
|
assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,14 +10,14 @@
|
||||||
<ul>
|
<ul>
|
||||||
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
||||||
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
||||||
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/3"></a></li>
|
||||||
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/4"></a></li>
|
||||||
</ul>
|
</ul>
|
||||||
<ul>
|
<ul>
|
||||||
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
|
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
|
||||||
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
|
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
|
||||||
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
|
<li class="post"><a href="http://webmagic.io/post/3"></a></li>
|
||||||
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
|
<li class="post"><a href="http://webmagic.io/post/4"></a></li>
|
||||||
</ul>
|
</ul>
|
||||||
<ul>
|
<ul>
|
||||||
<li class="numbers">1</li>
|
<li class="numbers">1</li>
|
||||||
|
|
Loading…
Reference in New Issue