From 3939074a23d7ed201f4b4360c1fd6c2c8078d075 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Tue, 27 May 2014 17:53:06 +0800 Subject: [PATCH] Bugfix: nodes() only return the first element #113 --- .../java/us/codecraft/webmagic/selector/HtmlNode.java | 8 ++++++-- .../us/codecraft/webmagic/selector/SelectorTest.java | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java index 3ca7e5c..3749686 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java @@ -81,8 +81,12 @@ public class HtmlNode extends AbstractSelectable { @Override public List nodes() { - ArrayList selectables = new ArrayList(); - selectables.add(this); + List selectables = new ArrayList(); + for (Element element : getElements()) { + List childElements = new ArrayList(1); + childElements.add(element); + selectables.add(new HtmlNode(childElements)); + } return selectables; } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java index 249a837..16b1b48 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java @@ -23,4 +23,13 @@ public class SelectorTest { assertThat(linksWithoutChain).hasSameSizeAs(linksWithChainFirstCall); assertThat(linksWithChainFirstCall).hasSameSizeAs(linksWithChainSecondCall); } + + @Test + public void testNodes() throws Exception { + Html selectable = new Html(html); + List links = selectable.xpath("//a").nodes(); + for (Selectable link : links) { + System.out.println(link.xpath("/@href")); + } + } }