diff --git a/pom.xml b/pom.xml
index 96b27fa..0e49cd3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -89,7 +89,7 @@
us.codecraft
xsoup
- 0.1.0
+ 0.2.0
net.sf.saxon
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcessor.java
index 1827e06..1dc3352 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcessor.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/BaiduBaikePageProcessor.java
@@ -20,7 +20,7 @@ public class BaiduBaikePageProcessor implements PageProcessor {
@Override
public void process(Page page) {
- page.putField("name", page.getHtml().$("h1.title div.lemmaTitleH1","text").toString());
+ page.putField("name", page.getHtml().css("h1.title div.lemmaTitleH1","text").toString());
page.putField("description", page.getHtml().xpath("//div[@id='lemmaContent-0']//div[@class='para']/allText()"));
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
index 9c7032c..185db74 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
@@ -2,6 +2,8 @@ package us.codecraft.webmagic.selector;
import org.apache.commons.collections.CollectionUtils;
import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import java.util.ArrayList;
@@ -33,11 +35,26 @@ public class CssSelector extends BaseElementSelector {
return element.outerHtml();
} else if ("innerHtml".equalsIgnoreCase(attrName)) {
return element.html();
+ } else if ("text".equalsIgnoreCase(attrName)) {
+ return getText(element);
+ } else if ("allText".equalsIgnoreCase(attrName)) {
+ return element.text();
} else {
return element.attr(attrName);
}
}
+ protected String getText(Element element) {
+ StringBuilder accum = new StringBuilder();
+ for (Node node : element.childNodes()) {
+ if (node instanceof TextNode) {
+ TextNode textNode = (TextNode) node;
+ accum.append(textNode.text());
+ }
+ }
+ return accum.toString();
+ }
+
@Override
public String select(Element element) {
Elements elements = element.select(selectorText);