diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java
index bbc7217..b267d5b 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/BaseElementSelector.java
@@ -1,6 +1,7 @@
package us.codecraft.webmagic.selector;
import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
@@ -11,11 +12,24 @@ import java.util.List;
* @since 0.3.0
*/
public abstract class BaseElementSelector implements Selector, ElementSelector {
+ private Document parse(String text) {
+ if (text == null) {
+ return null;
+ }
+
+ // Jsoup could not parse
or | tag directly
+ // https://stackoverflow.com/questions/63607740/jsoup-couldnt-parse-tr-tag
+ if ((text.startsWith("") && text.endsWith("
"))
+ || (text.startsWith("") && text.endsWith(" | "))) {
+ text = "";
+ }
+ return Jsoup.parse(text);
+ }
@Override
public String select(String text) {
if (text != null) {
- return select(Jsoup.parse(text));
+ return select(parse(text));
}
return null;
}
@@ -23,7 +37,7 @@ public abstract class BaseElementSelector implements Selector, ElementSelector {
@Override
public List selectList(String text) {
if (text != null) {
- return selectList(Jsoup.parse(text));
+ return selectList(parse(text));
} else {
return new ArrayList();
}
@@ -31,14 +45,14 @@ public abstract class BaseElementSelector implements Selector, ElementSelector {
public Element selectElement(String text) {
if (text != null) {
- return selectElement(Jsoup.parse(text));
+ return selectElement(parse(text));
}
return null;
}
public List selectElements(String text) {
if (text != null) {
- return selectElements(Jsoup.parse(text));
+ return selectElements(parse(text));
} else {
return new ArrayList();
}