diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index 375fe5f..8f7a625 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -128,7 +128,7 @@ class PageModelExtractor { FieldExtractor.Source source = null; switch (extractBy.source()){ - case RawText: + case RawText: source = FieldExtractor.Source.RawText; break; case RawHtml: @@ -144,10 +144,7 @@ class PageModelExtractor { fieldExtractor = new FieldExtractor(field, selector, source, extractBy.notNull(), List.class.isAssignableFrom(field.getType())); - Method setterMethod = getSetterMethod(clazz, field); - if (setterMethod != null) { - fieldExtractor.setSetterMethod(setterMethod); - } + fieldExtractor.setSetterMethod(getSetterMethod(clazz, field)); } return fieldExtractor; } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java index 54a4439..d3fc423 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java @@ -25,22 +25,17 @@ public class ExtractorUtils { selector = new RegexSelector(value); break; case XPath: - selector = getXpathSelector(value); + selector = new XpathSelector(value); break; case JsonPath: selector = new JsonPathSelector(value); break; default: - selector = getXpathSelector(value); + selector = new XpathSelector(value); } return selector; } - private static Selector getXpathSelector(String value) { - Selector selector = new XpathSelector(value); - return selector; - } - public static List getSelectors(ExtractBy[] extractBies) { List selectors = new ArrayList(); if (extractBies == null) { diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java index 87f0807..192856f 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java @@ -4,6 +4,7 @@ import org.apache.commons.lang3.time.DateFormatUtils; import org.apache.commons.lang3.time.DateUtils; import org.junit.Test; import us.codecraft.webmagic.model.annotation.ExtractBy; +import us.codecraft.webmagic.model.annotation.ExtractByUrl; import us.codecraft.webmagic.model.annotation.Formatter; import us.codecraft.webmagic.model.formatter.DateFormatter; @@ -74,6 +75,20 @@ public class PageModelExtractorTest { } + public static class ModelJsonStr { + + @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name",source = ExtractBy.Source.RawText) + private String name; + + } + + public static class ModelUrl { + + @ExtractByUrl("https://api\\.github\\.com/repos/\\w+/(\\w+)") + private String name; + + } + @Test public void testXpath() throws Exception { ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage()); @@ -115,4 +130,16 @@ public class PageModelExtractorTest { ModelCustomList modelDate = (ModelCustomList) PageModelExtractor.create(ModelCustomList.class).process(pageMocker.getMockPage()); assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd")); } + + @Test + public void testExtractJson() throws Exception { + ModelJsonStr modelDate = (ModelJsonStr) PageModelExtractor.create(ModelJsonStr.class).process(pageMocker.getMockJsonPage()); + assertThat(modelDate.name).isEqualTo("webmagic"); + } + + @Test + public void testExtractByUrl() throws Exception { + ModelUrl modelDate = (ModelUrl) PageModelExtractor.create(ModelUrl.class).process(pageMocker.getMockJsonPage()); + assertThat(modelDate.name).isEqualTo("webmagic"); + } }