diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java
index e41267b..d0dbfcd 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java
@@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable {
return selectElements(xpathSelector);
}
+ @Override
+ public Selectable selectList(Selector selector) {
+ if (selector instanceof BaseElementSelector) {
+ return selectElements((BaseElementSelector) selector);
+ }
+ return selectList(selector, getSourceTexts());
+ }
+
+ @Override
+ public Selectable select(Selector selector) {
+ return selectList(selector);
+ }
+
/**
* select elements
*
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java
index d9501a2..1e9fd52 100644
--- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java
@@ -1,12 +1,13 @@
package us.codecraft.webmagic.model;
-import junit.framework.Assert;
import org.junit.Test;
-import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.pipeline.PageModelPipeline;
+import static org.assertj.core.api.Assertions.assertThat;
+
/**
* @author code4crafter@gmail.com
*/
@@ -14,13 +15,13 @@ public class GithubRepoTest {
@Test
public void test() {
- OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
+ OOSpider.create(Site.me().setSleepTime(0)
, new PageModelPipeline() {
@Override
public void process(GithubRepo o, Task task) {
- Assert.assertEquals(86, o.getStar());
- Assert.assertEquals(70, o.getFork());
+ assertThat(o.getStar()).isEqualTo(86);
+ assertThat(o.getFork()).isEqualTo(70);
}
- }, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
+ }, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}
}
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java
new file mode 100644
index 0000000..6531053
--- /dev/null
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java
@@ -0,0 +1,13 @@
+package us.codecraft.webmagic.model;
+
+import us.codecraft.webmagic.model.annotation.HelpUrl;
+import us.codecraft.webmagic.model.annotation.TargetUrl;
+
+/**
+ * @author code4crafer@gmail.com
+ */
+@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
+@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
+public class MockModel {
+
+}
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java
index 74f3f6a..7733d4c 100644
--- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java
@@ -1,5 +1,6 @@
package us.codecraft.webmagic.model;
+import org.apache.commons.io.IOUtils;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
@@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.selector.PlainText;
+import java.io.IOException;
+
import static org.assertj.core.api.Assertions.assertThat;
/**
@@ -40,6 +43,22 @@ public class ModelPageProcessorTest {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
modelPageProcessor.process(page);
assertThat(page.getResultItems().isSkip()).isFalse();
+ }
+
+ @Test
+ public void testExtractLinks() throws Exception {
+ ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
+ Page page = getMockPage();
+ modelPageProcessor.process(page);
+ assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
}
+
+ private Page getMockPage() throws IOException {
+ Page page = new Page();
+ page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
+ page.setRequest(new Request("http://webmagic.io/list/0"));
+ page.setUrl(new PlainText("http://webmagic.io/list/0"));
+ return page;
+ }
}
diff --git a/webmagic-extension/src/test/resouces/html/mock-webmagic.html b/webmagic-extension/src/test/resouces/html/mock-webmagic.html
new file mode 100644
index 0000000..436e1e0
--- /dev/null
+++ b/webmagic-extension/src/test/resouces/html/mock-webmagic.html
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file