add tests before refactor #586
parent
b363ee6a9d
commit
b1ef61b278
|
@ -83,14 +83,13 @@ class PageModelExtractor {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
|
if (!formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
|
||||||
ObjectFormatter objectFormatter = initFormatter(formatter.formatter());
|
ObjectFormatter objectFormatter = initFormatter(formatter.formatter(), formatter.value());
|
||||||
objectFormatter.initParam(formatter.value());
|
|
||||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
|
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
|
||||||
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
|
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
|
||||||
ObjectFormatter objectFormatter = getObjectFormatter(field, fieldClazz, formatter);
|
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(fieldClazz), formatter.value());
|
||||||
if (objectFormatter == null) {
|
if (objectFormatter == null) {
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
|
||||||
} else {
|
} else {
|
||||||
|
@ -100,30 +99,22 @@ class PageModelExtractor {
|
||||||
if (!List.class.isAssignableFrom(field.getType())) {
|
if (!List.class.isAssignableFrom(field.getType())) {
|
||||||
throw new IllegalStateException("Field " + field.getName() + " must be list");
|
throw new IllegalStateException("Field " + field.getName() + " must be list");
|
||||||
}
|
}
|
||||||
if (formatter != null) {
|
if (!formatter.subClazz().equals(Void.class)) {
|
||||||
if (!formatter.subClazz().equals(Void.class)) {
|
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(formatter.subClazz()), formatter.value());
|
||||||
ObjectFormatter objectFormatter = getObjectFormatter(field, formatter.subClazz(), formatter);
|
if (objectFormatter == null) {
|
||||||
if (objectFormatter == null) {
|
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
|
||||||
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
|
} else {
|
||||||
} else {
|
fieldExtractor.setObjectFormatter(objectFormatter);
|
||||||
fieldExtractor.setObjectFormatter(objectFormatter);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private ObjectFormatter getObjectFormatter(Field field, Class<?> fieldClazz, Formatter formatter) {
|
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz, String[] params) {
|
||||||
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(fieldClazz));
|
|
||||||
if(formatter != null && formatter.value() != null){
|
|
||||||
objectFormatter.initParam(formatter.value());
|
|
||||||
}
|
|
||||||
return objectFormatter;
|
|
||||||
}
|
|
||||||
|
|
||||||
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz) {
|
|
||||||
try {
|
try {
|
||||||
return formatterClazz.newInstance();
|
ObjectFormatter objectFormatter = formatterClazz.newInstance();
|
||||||
|
objectFormatter.initParam(params);
|
||||||
|
return objectFormatter;
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* Date: 2017/6/3
|
||||||
|
* Time: 下午9:07
|
||||||
|
*/
|
||||||
|
public class GithubRepoApi {
|
||||||
|
|
||||||
|
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name",source = ExtractBy.Source.RawText)
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,12 +1,11 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.SimpleHttpClient;
|
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
||||||
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
|
||||||
import us.codecraft.webmagic.example.GithubRepo;
|
import us.codecraft.webmagic.example.GithubRepo;
|
||||||
|
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
@ -27,11 +26,4 @@ public class GithubRepoTest {
|
||||||
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void test1() throws Exception {
|
|
||||||
SimpleHttpClient simpleHttpClient = new SimpleHttpClient();
|
|
||||||
GithubRepo model = simpleHttpClient.get("https://github.com/code4craft/webmagic",GithubRepo.class);
|
|
||||||
System.out.println(model);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
package us.codecraft.webmagic.model;
|
|
||||||
|
|
||||||
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
|
||||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author code4crafer@gmail.com
|
|
||||||
*/
|
|
||||||
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
|
|
||||||
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
|
|
||||||
public class MockModel {
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,19 +1,13 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.time.DateFormatUtils;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
import us.codecraft.webmagic.model.annotation.Formatter;
|
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||||
import us.codecraft.webmagic.model.formatter.DateFormatter;
|
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -22,6 +16,8 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||||
*/
|
*/
|
||||||
public class ModelPageProcessorTest {
|
public class ModelPageProcessorTest {
|
||||||
|
|
||||||
|
private PageMocker pageMocker = new PageMocker();
|
||||||
|
|
||||||
@TargetUrl("http://codecraft.us/foo")
|
@TargetUrl("http://codecraft.us/foo")
|
||||||
public static class ModelFoo {
|
public static class ModelFoo {
|
||||||
|
|
||||||
|
@ -38,15 +34,10 @@ public class ModelPageProcessorTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class ModelDate {
|
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
|
||||||
|
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
|
||||||
|
public static class MockModel {
|
||||||
|
|
||||||
@Formatter(value = "yyyyMMdd", formatter = DateFormatter.class)
|
|
||||||
@ExtractBy(value = "//div[@class='date']/text()", notNull = true)
|
|
||||||
private Date date;
|
|
||||||
|
|
||||||
public Date getDate() {
|
|
||||||
return date;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -63,26 +54,11 @@ public class ModelPageProcessorTest {
|
||||||
@Test
|
@Test
|
||||||
public void testExtractLinks() throws Exception {
|
public void testExtractLinks() throws Exception {
|
||||||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
|
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
|
||||||
Page page = getMockPage();
|
Page page = pageMocker.getMockPage();
|
||||||
modelPageProcessor.process(page);
|
modelPageProcessor.process(page);
|
||||||
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
|
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testExtractDate() throws Exception {
|
|
||||||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelDate.class);
|
|
||||||
Page page = getMockPage();
|
|
||||||
modelPageProcessor.process(page);
|
|
||||||
ModelDate modelDate = (ModelDate) page.getResultItems().get(ModelDate.class.getCanonicalName());
|
|
||||||
assertThat(DateFormatUtils.format(modelDate.getDate(),"yyyyMMdd")).isEqualTo("20170603");
|
|
||||||
}
|
|
||||||
|
|
||||||
private Page getMockPage() throws IOException {
|
|
||||||
Page page = new Page();
|
|
||||||
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
|
|
||||||
page.setRequest(new Request("http://webmagic.io/list/0"));
|
|
||||||
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,6 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Page;
|
|
||||||
import us.codecraft.webmagic.Request;
|
|
||||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
@ -18,29 +11,13 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||||
*/
|
*/
|
||||||
public class PageMapperTest {
|
public class PageMapperTest {
|
||||||
|
|
||||||
public static class GithubRepo {
|
private PageMocker pageMocker = new PageMocker();
|
||||||
|
|
||||||
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name",source = ExtractBy.Source.RawText)
|
|
||||||
private String name;
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test_get() throws Exception {
|
public void test_get() throws Exception {
|
||||||
PageMapper<GithubRepo> pageMapper = new PageMapper<GithubRepo>(GithubRepo.class);
|
PageMapper<GithubRepoApi> pageMapper = new PageMapper<GithubRepoApi>(GithubRepoApi.class);
|
||||||
GithubRepo githubRepo = pageMapper.get(getMockJsonPage());
|
GithubRepoApi githubRepo = pageMapper.get(pageMocker.getMockJsonPage());
|
||||||
assertThat(githubRepo.getName()).isEqualTo("webmagic");
|
assertThat(githubRepo.getName()).isEqualTo("webmagic");
|
||||||
}
|
}
|
||||||
|
|
||||||
private Page getMockJsonPage() throws IOException {
|
|
||||||
Page page = new Page();
|
|
||||||
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("json/mock-githubrepo.json")));
|
|
||||||
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
|
|
||||||
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import us.codecraft.webmagic.Page;
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* Date: 2017/6/3
|
||||||
|
* Time: 下午9:08
|
||||||
|
*/
|
||||||
|
public class PageMocker {
|
||||||
|
|
||||||
|
public Page getMockJsonPage() throws IOException {
|
||||||
|
Page page = new Page();
|
||||||
|
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json")));
|
||||||
|
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
|
||||||
|
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Page getMockPage() throws IOException {
|
||||||
|
Page page = new Page();
|
||||||
|
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
|
||||||
|
page.setRequest(new Request("http://webmagic.io/list/0"));
|
||||||
|
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,103 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.time.DateFormatUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
import us.codecraft.webmagic.model.annotation.Formatter;
|
||||||
|
import us.codecraft.webmagic.model.formatter.DateFormatter;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* Date: 2017/6/3
|
||||||
|
* Time: 下午9:06
|
||||||
|
*/
|
||||||
|
public class PageModelExtractorTest {
|
||||||
|
|
||||||
|
private PageMocker pageMocker = new PageMocker();
|
||||||
|
|
||||||
|
public static class ModelDateStr {
|
||||||
|
|
||||||
|
@ExtractBy(value = "//div[@class='date']/text()", notNull = true)
|
||||||
|
private String dateStr;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelDate {
|
||||||
|
|
||||||
|
@Formatter(value = "yyyyMMdd", formatter = DateFormatter.class)
|
||||||
|
@ExtractBy(value = "//div[@class='date']/text()", notNull = true)
|
||||||
|
private Date date;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelInt {
|
||||||
|
|
||||||
|
@ExtractBy(value = "//div[@class='number']/text()", notNull = true)
|
||||||
|
private int number;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelStringList {
|
||||||
|
|
||||||
|
@ExtractBy("//a/@href")
|
||||||
|
private List<String> links;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelIntList {
|
||||||
|
|
||||||
|
@Formatter(subClazz = Integer.class)
|
||||||
|
@ExtractBy("//li[@class='numbers']/text()")
|
||||||
|
private List<Integer> numbers;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ModelDateList {
|
||||||
|
|
||||||
|
@Formatter(subClazz = Date.class, value = "yyyyMMdd")
|
||||||
|
@ExtractBy("//li[@class='dates']/text()")
|
||||||
|
private List<Date> dates;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testXpath() throws Exception {
|
||||||
|
ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(modelDate.dateStr).isEqualTo("20170603");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractDate() throws Exception {
|
||||||
|
ModelDate modelDate = (ModelDate) PageModelExtractor.create(ModelDate.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(DateFormatUtils.format(modelDate.date,"yyyyMMdd")).isEqualTo("20170603");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractInt() throws Exception {
|
||||||
|
ModelInt modelDate = (ModelInt) PageModelExtractor.create(ModelInt.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(modelDate.number).isEqualTo(12);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractList() throws Exception {
|
||||||
|
ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(modelDate.links).hasSize(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractIntList() throws Exception {
|
||||||
|
ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(modelDate.numbers).hasSize(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractDateList() throws Exception {
|
||||||
|
ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage());
|
||||||
|
assertThat(modelDate.dates).hasSize(4);
|
||||||
|
}
|
||||||
|
}
|
|
@ -6,6 +6,7 @@
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="date">20170603</div>
|
<div class="date">20170603</div>
|
||||||
|
<div class="number">12</div>
|
||||||
<ul>
|
<ul>
|
||||||
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
||||||
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
||||||
|
@ -18,6 +19,17 @@
|
||||||
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
|
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
|
||||||
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
|
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
<ul>
|
||||||
|
<li class="numbers">1</li>
|
||||||
|
<li class="numbers">2</li>
|
||||||
|
<li class="numbers">3</li>
|
||||||
|
<li class="numbers">4</li>
|
||||||
|
</ul>
|
||||||
|
<ul>
|
||||||
|
<li class="dates">20170601</li>
|
||||||
|
<li class="dates">20170602</li>
|
||||||
|
<li class="dates">20170603</li>
|
||||||
|
<li class="dates">20170604</li>
|
||||||
|
</ul>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
Loading…
Reference in New Issue