newselectors
parent
b1cba78bd6
commit
55d4a76ab7
|
@ -25,6 +25,12 @@
|
|||
<artifactId>commons-lang3</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>xsoup</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* @since 0.2.2
|
||||
*/
|
||||
public abstract class BaseElementSelector implements Selector,ElementSelector {
|
||||
|
||||
@Override
|
||||
public String select(String text) {
|
||||
return select(Jsoup.parse(text));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(String text) {
|
||||
return selectList(Jsoup.parse(text));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,8 +1,6 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
|
@ -15,7 +13,7 @@ import java.util.List;
|
|||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.1.0
|
||||
*/
|
||||
public class CssSelector implements Selector {
|
||||
public class CssSelector extends BaseElementSelector {
|
||||
|
||||
private String selectorText;
|
||||
|
||||
|
@ -30,16 +28,6 @@ public class CssSelector implements Selector {
|
|||
this.attrName = attrName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String select(String text) {
|
||||
Document doc = Jsoup.parse(text);
|
||||
Elements elements = doc.select(selectorText);
|
||||
if (CollectionUtils.isEmpty(elements)) {
|
||||
return null;
|
||||
}
|
||||
return getValue(elements.get(0));
|
||||
}
|
||||
|
||||
private String getValue(Element element) {
|
||||
if (attrName == null) {
|
||||
return element.outerHtml();
|
||||
|
@ -51,9 +39,17 @@ public class CssSelector implements Selector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(String text) {
|
||||
public String select(Element element) {
|
||||
Elements elements = element.select(selectorText);
|
||||
if (CollectionUtils.isEmpty(elements)) {
|
||||
return null;
|
||||
}
|
||||
return getValue(elements.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(Element doc) {
|
||||
List<String> strings = new ArrayList<String>();
|
||||
Document doc = Jsoup.parse(text);
|
||||
Elements elements = doc.select(selectorText);
|
||||
if (CollectionUtils.isNotEmpty(elements)) {
|
||||
for (Element element : elements) {
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Selector(extractor) for html elements.<br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.2
|
||||
*/
|
||||
public interface ElementSelector {
|
||||
|
||||
/**
|
||||
* Extract single result in text.<br>
|
||||
* If there are more than one result, only the first will be chosen.
|
||||
*
|
||||
* @param element
|
||||
* @return result
|
||||
*/
|
||||
public String select(Element element);
|
||||
|
||||
/**
|
||||
* Extract all results in text.<br>
|
||||
*
|
||||
* @param element
|
||||
* @return results
|
||||
*/
|
||||
public List<String> selectList(Element element);
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import us.codecraft.xsoup.XPathEvaluator;
|
||||
import us.codecraft.xsoup.Xsoup;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* XPath selector based on Xsoup.<br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.2
|
||||
*/
|
||||
public class XsoupSelector extends BaseElementSelector {
|
||||
|
||||
private XPathEvaluator xPathEvaluator;
|
||||
|
||||
public XsoupSelector(String xpathStr) {
|
||||
this.xPathEvaluator = Xsoup.compile(xpathStr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String select(Element element) {
|
||||
return xPathEvaluator.evaluate(element).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> selectList(Element element) {
|
||||
return xPathEvaluator.evaluate(element).list();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue