xsoup test
parent
e87489d5dc
commit
b1cba78bd6
|
@ -17,6 +17,11 @@
|
|||
<artifactId>webmagic-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>xsoup</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf.saxon</groupId>
|
||||
<artifactId>Saxon-HE</artifactId>
|
||||
|
|
|
@ -1,8 +1,15 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.htmlcleaner.HtmlCleaner;
|
||||
import org.htmlcleaner.TagNode;
|
||||
import org.htmlcleaner.XPatherException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import us.codecraft.xsoup.XPathEvaluator;
|
||||
import us.codecraft.xsoup.Xsoup;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br> Date: 13-4-21 Time: 上午10:06
|
||||
|
@ -1353,6 +1360,7 @@ public class XpathSelectorTest {
|
|||
Html html1 = new Html(html);
|
||||
Assert.assertEquals("再次吐槽easyui", html1.xpath(".//*[@class='QTitle']/h1/a").toString());
|
||||
Assert.assertNotNull(html1.$("a[href]").xpath("//@href").all());
|
||||
Selectors.xpath("/abc/").select("");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1379,17 +1387,86 @@ public class XpathSelectorTest {
|
|||
xpath2Selector.selectList(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
XpathSelector xpathSelector = new XpathSelector("//a");
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
xpathSelector.selectList(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
xpath2Selector.selectList(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis() - time);
|
||||
|
||||
CssSelector cssSelector = new CssSelector("a");
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
cssSelector.selectList(html);
|
||||
}
|
||||
System.out.println("css "+(System.currentTimeMillis()-time));
|
||||
}
|
||||
|
||||
@Ignore("take long time")
|
||||
@Test
|
||||
public void parserPerformanceTest() throws XPatherException {
|
||||
System.out.println(html.length());
|
||||
|
||||
HtmlCleaner htmlCleaner = new HtmlCleaner();
|
||||
TagNode tagNode = htmlCleaner.clean(html);
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
long time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
htmlCleaner.clean(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
tagNode.evaluateXPath("//a");
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
System.out.println("=============");
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
Jsoup.parse(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
document.select("a");
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
System.out.println("=============");
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
htmlCleaner.clean(html);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
tagNode.evaluateXPath("//a");
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
System.out.println("=============");
|
||||
|
||||
XPathEvaluator compile = Xsoup.compile("//a");
|
||||
time =System.currentTimeMillis();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
compile.evaluate(document);
|
||||
}
|
||||
System.out.println(System.currentTimeMillis()-time);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue