test of ExtractByUrl #586
parent
d8bd0637a1
commit
9b77306098
|
@ -128,7 +128,7 @@ class PageModelExtractor {
|
||||||
|
|
||||||
FieldExtractor.Source source = null;
|
FieldExtractor.Source source = null;
|
||||||
switch (extractBy.source()){
|
switch (extractBy.source()){
|
||||||
case RawText:
|
case RawText:
|
||||||
source = FieldExtractor.Source.RawText;
|
source = FieldExtractor.Source.RawText;
|
||||||
break;
|
break;
|
||||||
case RawHtml:
|
case RawHtml:
|
||||||
|
@ -144,10 +144,7 @@ class PageModelExtractor {
|
||||||
|
|
||||||
fieldExtractor = new FieldExtractor(field, selector, source,
|
fieldExtractor = new FieldExtractor(field, selector, source,
|
||||||
extractBy.notNull(), List.class.isAssignableFrom(field.getType()));
|
extractBy.notNull(), List.class.isAssignableFrom(field.getType()));
|
||||||
Method setterMethod = getSetterMethod(clazz, field);
|
fieldExtractor.setSetterMethod(getSetterMethod(clazz, field));
|
||||||
if (setterMethod != null) {
|
|
||||||
fieldExtractor.setSetterMethod(setterMethod);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return fieldExtractor;
|
return fieldExtractor;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,22 +25,17 @@ public class ExtractorUtils {
|
||||||
selector = new RegexSelector(value);
|
selector = new RegexSelector(value);
|
||||||
break;
|
break;
|
||||||
case XPath:
|
case XPath:
|
||||||
selector = getXpathSelector(value);
|
selector = new XpathSelector(value);
|
||||||
break;
|
break;
|
||||||
case JsonPath:
|
case JsonPath:
|
||||||
selector = new JsonPathSelector(value);
|
selector = new JsonPathSelector(value);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
selector = getXpathSelector(value);
|
selector = new XpathSelector(value);
|
||||||
}
|
}
|
||||||
return selector;
|
return selector;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Selector getXpathSelector(String value) {
|
|
||||||
Selector selector = new XpathSelector(value);
|
|
||||||
return selector;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static List<Selector> getSelectors(ExtractBy[] extractBies) {
|
public static List<Selector> getSelectors(ExtractBy[] extractBies) {
|
||||||
List<Selector> selectors = new ArrayList<Selector>();
|
List<Selector> selectors = new ArrayList<Selector>();
|
||||||
if (extractBies == null) {
|
if (extractBies == null) {
|
||||||
|
|
|
@ -4,6 +4,7 @@ import org.apache.commons.lang3.time.DateFormatUtils;
|
||||||
import org.apache.commons.lang3.time.DateUtils;
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
|
||||||
import us.codecraft.webmagic.model.annotation.Formatter;
|
import us.codecraft.webmagic.model.annotation.Formatter;
|
||||||
import us.codecraft.webmagic.model.formatter.DateFormatter;
|
import us.codecraft.webmagic.model.formatter.DateFormatter;
|
||||||
|
|
||||||
|
@ -74,6 +75,20 @@ public class PageModelExtractorTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class ModelJsonStr {
|
||||||
|
|
||||||
|
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name",source = ExtractBy.Source.RawText)
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelUrl {
|
||||||
|
|
||||||
|
@ExtractByUrl("https://api\\.github\\.com/repos/\\w+/(\\w+)")
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testXpath() throws Exception {
|
public void testXpath() throws Exception {
|
||||||
ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage());
|
ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage());
|
||||||
|
@ -115,4 +130,16 @@ public class PageModelExtractorTest {
|
||||||
ModelCustomList modelDate = (ModelCustomList) PageModelExtractor.create(ModelCustomList.class).process(pageMocker.getMockPage());
|
ModelCustomList modelDate = (ModelCustomList) PageModelExtractor.create(ModelCustomList.class).process(pageMocker.getMockPage());
|
||||||
assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd"));
|
assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractJson() throws Exception {
|
||||||
|
ModelJsonStr modelDate = (ModelJsonStr) PageModelExtractor.create(ModelJsonStr.class).process(pageMocker.getMockJsonPage());
|
||||||
|
assertThat(modelDate.name).isEqualTo("webmagic");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractByUrl() throws Exception {
|
||||||
|
ModelUrl modelDate = (ModelUrl) PageModelExtractor.create(ModelUrl.class).process(pageMocker.getMockJsonPage());
|
||||||
|
assertThat(modelDate.name).isEqualTo("webmagic");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue