diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java index bbc7217..b267d5b 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java @@ -1,6 +1,7 @@ package us.codecraft.webmagic.selector; import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import java.util.ArrayList; @@ -11,11 +12,24 @@ import java.util.List; * @since 0.3.0 */ public abstract class BaseElementSelector implements Selector, ElementSelector { + private Document parse(String text) { + if (text == null) { + return null; + } + + // Jsoup could not parse or tag directly + // https://stackoverflow.com/questions/63607740/jsoup-couldnt-parse-tr-tag + if ((text.startsWith("") && text.endsWith("")) + || (text.startsWith("") && text.endsWith(""))) { + text = "" + text + "
"; + } + return Jsoup.parse(text); + } @Override public String select(String text) { if (text != null) { - return select(Jsoup.parse(text)); + return select(parse(text)); } return null; } @@ -23,7 +37,7 @@ public abstract class BaseElementSelector implements Selector, ElementSelector { @Override public List selectList(String text) { if (text != null) { - return selectList(Jsoup.parse(text)); + return selectList(parse(text)); } else { return new ArrayList(); } @@ -31,14 +45,14 @@ public abstract class BaseElementSelector implements Selector, ElementSelector { public Element selectElement(String text) { if (text != null) { - return selectElement(Jsoup.parse(text)); + return selectElement(parse(text)); } return null; } public List selectElements(String text) { if (text != null) { - return selectElements(Jsoup.parse(text)); + return selectElements(parse(text)); } else { return new ArrayList(); }