diff --git a/pom.xml b/pom.xml
index 97897db..16e14cf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -171,7 +171,7 @@
net.sourceforge.htmlcleaner
htmlcleaner
- 2.24
+ 2.5
com.github.detro
diff --git a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
index d8aab6c..1f1f0a5 100644
--- a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
+++ b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
@@ -1,16 +1,11 @@
package us.codecraft.webmagic.selector;
-import net.sf.saxon.lib.NamespaceConstant;
-import net.sf.saxon.xpath.XPathEvaluator;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.DomSerializer;
-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
import javax.xml.namespace.NamespaceContext;
import javax.xml.transform.OutputKeys;
@@ -21,12 +16,19 @@ import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
+
+import org.htmlcleaner.CleanerProperties;
+import org.htmlcleaner.DomSerializer;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.TagNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import net.sf.saxon.lib.NamespaceConstant;
+import net.sf.saxon.xpath.XPathEvaluator;
/**
* 支持xpath2.0的选择器。包装了HtmlCleaner和Saxon HE。
diff --git a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
index 38aac15..32906b5 100644
--- a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
+++ b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
@@ -1,5 +1,7 @@
package us.codecraft.webmagic.selector;
+import java.util.List;
+
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
@@ -1368,15 +1370,19 @@ public class XpathSelectorTest {
public void testXPath2() {
String text = "眉山:扎实推进农业农村工作 促农持续增收
\n" +
"2013-07-31 23:29:45 来源:眉山网 责任编辑:张斯炜
";
- XpathSelector xpathSelector = new XpathSelector("//h1/text()");
- Assert.assertEquals("眉山:扎实推进农业农村工作 促农持续增收 ", xpathSelector.select(text));
+ Xpath2Selector xpathSelector = new Xpath2Selector("//h1/text()");
+ Assert.assertEquals("眉山:扎实推进农业农村工作 促农持续增收", xpathSelector.select(text));
}
@Test
public void testXpath2Selector() {
Xpath2Selector xpath2Selector = new Xpath2Selector("//a/@href");
String select = xpath2Selector.select(html);
- Assert.assertNotNull(select);
+ Assert.assertEquals("http://www.oschina.net/", select);
+
+ List selectList = xpath2Selector.selectList(html);
+ Assert.assertEquals(113, selectList.size());
+ Assert.assertEquals("http://www.oschina.net/", selectList.get(0));
}
@Ignore("take long time")