diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index efa8291..1c4e745 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -17,6 +17,11 @@
webmagic-core
${project.version}
+
+ us.codecraft
+ xsoup
+ 0.0.1-SNAPSHOT
+
net.sf.saxon
Saxon-HE
diff --git a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
index b623040..6c19c8a 100644
--- a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
+++ b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
@@ -1,8 +1,15 @@
package us.codecraft.webmagic.selector;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.TagNode;
+import org.htmlcleaner.XPatherException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
+import us.codecraft.xsoup.XPathEvaluator;
+import us.codecraft.xsoup.Xsoup;
/**
* @author code4crafter@gmail.com
Date: 13-4-21 Time: 上午10:06
@@ -1353,6 +1360,7 @@ public class XpathSelectorTest {
Html html1 = new Html(html);
Assert.assertEquals("再次吐槽easyui", html1.xpath(".//*[@class='QTitle']/h1/a").toString());
Assert.assertNotNull(html1.$("a[href]").xpath("//@href").all());
+ Selectors.xpath("/abc/").select("");
}
@Test
@@ -1379,17 +1387,86 @@ public class XpathSelectorTest {
xpath2Selector.selectList(html);
}
System.out.println(System.currentTimeMillis()-time);
+
XpathSelector xpathSelector = new XpathSelector("//a");
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpathSelector.selectList(html);
}
System.out.println(System.currentTimeMillis()-time);
+
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html);
}
+ System.out.println(System.currentTimeMillis() - time);
+
+ CssSelector cssSelector = new CssSelector("a");
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 1000; i++) {
+ cssSelector.selectList(html);
+ }
+ System.out.println("css "+(System.currentTimeMillis()-time));
+ }
+
+ @Ignore("take long time")
+ @Test
+ public void parserPerformanceTest() throws XPatherException {
+ System.out.println(html.length());
+
+ HtmlCleaner htmlCleaner = new HtmlCleaner();
+ TagNode tagNode = htmlCleaner.clean(html);
+ Document document = Jsoup.parse(html);
+
+ long time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ htmlCleaner.clean(html);
+ }
System.out.println(System.currentTimeMillis()-time);
+
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ tagNode.evaluateXPath("//a");
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
+ System.out.println("=============");
+
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ Jsoup.parse(html);
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ document.select("a");
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
+ System.out.println("=============");
+
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ htmlCleaner.clean(html);
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ tagNode.evaluateXPath("//a");
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
+ System.out.println("=============");
+
+ XPathEvaluator compile = Xsoup.compile("//a");
+ time =System.currentTimeMillis();
+ for (int i = 0; i < 2000; i++) {
+ compile.evaluate(document);
+ }
+ System.out.println(System.currentTimeMillis()-time);
+
}
}