fix SourceRegion error and add some tests on it #144
parent
4e5ba02020
commit
e7668e01b8
|
@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable {
|
|||
return selectElements(xpathSelector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Selectable selectList(Selector selector) {
|
||||
if (selector instanceof BaseElementSelector) {
|
||||
return selectElements((BaseElementSelector) selector);
|
||||
}
|
||||
return selectList(selector, getSourceTexts());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Selectable select(Selector selector) {
|
||||
return selectList(selector);
|
||||
}
|
||||
|
||||
/**
|
||||
* select elements
|
||||
*
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
package us.codecraft.webmagic.model;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.downloader.MockGithubDownloader;
|
||||
import us.codecraft.webmagic.pipeline.PageModelPipeline;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
*/
|
||||
|
@ -14,13 +15,13 @@ public class GithubRepoTest {
|
|||
|
||||
@Test
|
||||
public void test() {
|
||||
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
|
||||
OOSpider.create(Site.me().setSleepTime(0)
|
||||
, new PageModelPipeline<GithubRepo>() {
|
||||
@Override
|
||||
public void process(GithubRepo o, Task task) {
|
||||
Assert.assertEquals(86, o.getStar());
|
||||
Assert.assertEquals(70, o.getFork());
|
||||
assertThat(o.getStar()).isEqualTo(86);
|
||||
assertThat(o.getFork()).isEqualTo(70);
|
||||
}
|
||||
}, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
||||
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
package us.codecraft.webmagic.model;
|
||||
|
||||
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||
|
||||
/**
|
||||
* @author code4crafer@gmail.com
|
||||
*/
|
||||
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
|
||||
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
|
||||
public class MockModel {
|
||||
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
package us.codecraft.webmagic.model;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
|
@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
|
|||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
|
@ -40,6 +43,22 @@ public class ModelPageProcessorTest {
|
|||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
|
||||
modelPageProcessor.process(page);
|
||||
assertThat(page.getResultItems().isSkip()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractLinks() throws Exception {
|
||||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
|
||||
Page page = getMockPage();
|
||||
modelPageProcessor.process(page);
|
||||
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
|
||||
|
||||
}
|
||||
|
||||
private Page getMockPage() throws IOException {
|
||||
Page page = new Page();
|
||||
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
|
||||
page.setRequest(new Request("http://webmagic.io/list/0"));
|
||||
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head lang="zh">
|
||||
<meta charset="UTF-8">
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
<ul>
|
||||
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
|
||||
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
|
||||
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
|
||||
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
|
||||
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
|
||||
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
|
||||
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue