fix SourceRegion error and add some tests on it #144
parent
4e5ba02020
commit
e7668e01b8
|
@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable {
|
||||||
return selectElements(xpathSelector);
|
return selectElements(xpathSelector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Selectable selectList(Selector selector) {
|
||||||
|
if (selector instanceof BaseElementSelector) {
|
||||||
|
return selectElements((BaseElementSelector) selector);
|
||||||
|
}
|
||||||
|
return selectList(selector, getSourceTexts());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Selectable select(Selector selector) {
|
||||||
|
return selectList(selector);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* select elements
|
* select elements
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
|
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
||||||
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com <br>
|
* @author code4crafter@gmail.com <br>
|
||||||
*/
|
*/
|
||||||
|
@ -14,13 +15,13 @@ public class GithubRepoTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() {
|
public void test() {
|
||||||
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
|
OOSpider.create(Site.me().setSleepTime(0)
|
||||||
, new PageModelPipeline<GithubRepo>() {
|
, new PageModelPipeline<GithubRepo>() {
|
||||||
@Override
|
@Override
|
||||||
public void process(GithubRepo o, Task task) {
|
public void process(GithubRepo o, Task task) {
|
||||||
Assert.assertEquals(86, o.getStar());
|
assertThat(o.getStar()).isEqualTo(86);
|
||||||
Assert.assertEquals(70, o.getFork());
|
assertThat(o.getFork()).isEqualTo(70);
|
||||||
}
|
}
|
||||||
}, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||||
|
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafer@gmail.com
|
||||||
|
*/
|
||||||
|
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
|
||||||
|
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
|
||||||
|
public class MockModel {
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,6 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
|
@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -40,6 +43,22 @@ public class ModelPageProcessorTest {
|
||||||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
|
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
|
||||||
modelPageProcessor.process(page);
|
modelPageProcessor.process(page);
|
||||||
assertThat(page.getResultItems().isSkip()).isFalse();
|
assertThat(page.getResultItems().isSkip()).isFalse();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractLinks() throws Exception {
|
||||||
|
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
|
||||||
|
Page page = getMockPage();
|
||||||
|
modelPageProcessor.process(page);
|
||||||
|
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Page getMockPage() throws IOException {
|
||||||
|
Page page = new Page();
|
||||||
|
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
|
||||||
|
page.setRequest(new Request("http://webmagic.io/list/0"));
|
||||||
|
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
||||||
|
return page;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head lang="zh">
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title></title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<ul>
|
||||||
|
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
||||||
|
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
||||||
|
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
|
||||||
|
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
|
||||||
|
</ul>
|
||||||
|
<ul>
|
||||||
|
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
|
||||||
|
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
|
||||||
|
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
|
||||||
|
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in New Issue