update jsoup to 1.10.3 #608
parent
faca38d4ec
commit
eb376fca74
2
pom.xml
2
pom.xml
|
@ -146,7 +146,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.jsoup</groupId>
|
<groupId>org.jsoup</groupId>
|
||||||
<artifactId>jsoup</artifactId>
|
<artifactId>jsoup</artifactId>
|
||||||
<version>1.8.3</version>
|
<version>1.10.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
|
|
|
@ -3,7 +3,6 @@ package us.codecraft.webmagic.selector;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.nodes.Entities;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -20,25 +19,12 @@ public class Html extends HtmlNode {
|
||||||
|
|
||||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
private static volatile boolean INITED = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Disable jsoup html entity escape. It can be set just before any Html instance is created.
|
* Disable jsoup html entity escape. It can be set just before any Html instance is created.
|
||||||
|
* @deprecated
|
||||||
*/
|
*/
|
||||||
public static boolean DISABLE_HTML_ENTITY_ESCAPE = false;
|
public static boolean DISABLE_HTML_ENTITY_ESCAPE = false;
|
||||||
|
|
||||||
/**
|
|
||||||
* Disable jsoup html entity escape. It is a hack way only for jsoup 1.7.2.
|
|
||||||
*/
|
|
||||||
private void disableJsoupHtmlEntityEscape() {
|
|
||||||
if (DISABLE_HTML_ENTITY_ESCAPE && !INITED) {
|
|
||||||
Entities.EscapeMode.base.getMap().clear();
|
|
||||||
Entities.EscapeMode.extended.getMap().clear();
|
|
||||||
Entities.EscapeMode.xhtml.getMap().clear();
|
|
||||||
INITED = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store parsed document for better performance when only one text exist.
|
* Store parsed document for better performance when only one text exist.
|
||||||
*/
|
*/
|
||||||
|
@ -46,7 +32,6 @@ public class Html extends HtmlNode {
|
||||||
|
|
||||||
public Html(String text, String url) {
|
public Html(String text, String url) {
|
||||||
try {
|
try {
|
||||||
disableJsoupHtmlEntityEscape();
|
|
||||||
this.document = Jsoup.parse(text, url);
|
this.document = Jsoup.parse(text, url);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
this.document = null;
|
this.document = null;
|
||||||
|
@ -56,7 +41,6 @@ public class Html extends HtmlNode {
|
||||||
|
|
||||||
public Html(String text) {
|
public Html(String text) {
|
||||||
try {
|
try {
|
||||||
disableJsoupHtmlEntityEscape();
|
|
||||||
this.document = Jsoup.parse(text);
|
this.document = Jsoup.parse(text);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
this.document = null;
|
this.document = null;
|
||||||
|
|
|
@ -30,7 +30,6 @@ public class HtmlTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEnableJsoupHtmlEntityEscape() throws Exception {
|
public void testEnableJsoupHtmlEntityEscape() throws Exception {
|
||||||
Html.DISABLE_HTML_ENTITY_ESCAPE = false;
|
|
||||||
Html html = new Html("aaaaaaa&b");
|
Html html = new Html("aaaaaaa&b");
|
||||||
assertThat(html.regex("(aaaaaaa&b)").toString()).isEqualTo("aaaaaaa&b");
|
assertThat(html.regex("(aaaaaaa&b)").toString()).isEqualTo("aaaaaaa&b");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue