diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index a234a4f..8d2c070 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -10,6 +10,12 @@
webmagic-extension
+
+ org.projectlombok
+ lombok
+ 1.18.32
+ provided
+
redis.clients
jedis
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/Extractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/Extractor.java
index f1d2f84..d64adff 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/Extractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/Extractor.java
@@ -1,5 +1,7 @@
package us.codecraft.webmagic.model;
+import lombok.Getter;
+import lombok.Setter;
import us.codecraft.webmagic.selector.Selector;
/**
@@ -7,17 +9,19 @@ import us.codecraft.webmagic.selector.Selector;
* @author code4crafter@gmail.com
* @since 0.2.0
*/
-class Extractor {
+public class Extractor {
+ @Getter @Setter
protected Selector selector;
+ @Getter
protected final Source source;
protected final boolean notNull;
protected final boolean multi;
- static enum Source {Html, Url, RawHtml, RawText}
+ public static enum Source {Html, Url, RawHtml, RawText}
public Extractor(Selector selector, Source source, boolean notNull, boolean multi) {
this.selector = selector;
@@ -26,23 +30,11 @@ class Extractor {
this.multi = multi;
}
- Selector getSelector() {
- return selector;
- }
-
- Source getSource() {
- return source;
- }
-
- boolean isNotNull() {
+ public boolean isNotNull() {
return notNull;
}
- boolean isMulti() {
+ public boolean isMulti() {
return multi;
}
-
- void setSelector(Selector selector) {
- this.selector = selector;
- }
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java
index a2cba13..a49ea77 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java
@@ -6,53 +6,27 @@ import us.codecraft.webmagic.selector.Selector;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
+import lombok.Getter;
+import lombok.Setter;
+
/**
* Wrapper of field and extractor.
* @author code4crafter@gmail.com
* @since 0.2.0
*/
-class FieldExtractor extends Extractor {
+public class FieldExtractor extends Extractor {
+ @Getter
private final Field field;
+ @Getter @Setter
private Method setterMethod;
+ @Getter @Setter
private ObjectFormatter objectFormatter;
public FieldExtractor(Field field, Selector selector, Source source, boolean notNull, boolean multi) {
super(selector, source, notNull, multi);
this.field = field;
}
-
- Field getField() {
- return field;
- }
-
- Selector getSelector() {
- return selector;
- }
-
- Source getSource() {
- return source;
- }
-
- void setSetterMethod(Method setterMethod) {
- this.setterMethod = setterMethod;
- }
-
- Method getSetterMethod() {
- return setterMethod;
- }
-
- boolean isNotNull() {
- return notNull;
- }
-
- ObjectFormatter getObjectFormatter() {
- return objectFormatter;
- }
-
- void setObjectFormatter(ObjectFormatter objectFormatter) {
- this.objectFormatter = objectFormatter;
- }
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
index d8947de..de71717 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
@@ -3,17 +3,21 @@ package us.codecraft.webmagic.model;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+
+import lombok.Getter;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.annotation.*;
-import us.codecraft.webmagic.model.formatter.ObjectFormatter;
+import us.codecraft.webmagic.model.fields.PageField;
import us.codecraft.webmagic.model.formatter.ObjectFormatterBuilder;
+import us.codecraft.webmagic.model.selections.MultipleSelection;
+import us.codecraft.webmagic.model.selections.Selection;
+import us.codecraft.webmagic.model.selections.SingleSelection;
import us.codecraft.webmagic.selector.*;
import us.codecraft.webmagic.utils.ClassUtils;
import us.codecraft.webmagic.utils.ExtractorUtils;
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
-import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
@@ -29,14 +33,19 @@ import static us.codecraft.webmagic.model.annotation.ExtractBy.Source.RawText;
*/
class PageModelExtractor {
+ @Getter
private List targetUrlPatterns = new ArrayList();
+ @Getter
private Selector targetUrlRegionSelector;
+ @Getter
private List helpUrlPatterns = new ArrayList();
+ @Getter
private Selector helpUrlRegionSelector;
+ @Getter
private Class clazz;
private List fieldExtractors;
@@ -233,145 +242,16 @@ class PageModelExtractor {
try {
o = clazz.newInstance();
for (FieldExtractor fieldExtractor : fieldExtractors) {
- if (fieldExtractor.isMulti()) {
- List value=getMultiValueFromSource(page, fieldExtractor, html, isRaw);
- if ((value == null || value.size() == 0) && fieldExtractor.isNotNull()) {
- return null;
- }
- if (fieldExtractor.getObjectFormatter() != null) {
- List