change dependency versions into properties
change dependency versions into properties update commons-collections from 3.x to 4.4master
parent
692605bd75
commit
54da7af17e
78
pom.xml
78
pom.xml
|
@ -9,7 +9,31 @@
|
||||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||||
<maven.compiler.source>1.8</maven.compiler.source>
|
<maven.compiler.source>1.8</maven.compiler.source>
|
||||||
<maven.compiler.target>1.8</maven.compiler.target>
|
<maven.compiler.target>1.8</maven.compiler.target>
|
||||||
|
<assertj.version>3.18.1</assertj.version>
|
||||||
|
<commons-cli.version>1.4</commons-cli.version>
|
||||||
|
<commons-collections4.version>4.4</commons-collections4.version>
|
||||||
|
<commons-io.version>2.11.0</commons-io.version>
|
||||||
|
<commons-lang3.version>3.12.0</commons-lang3.version>
|
||||||
|
<fastjson.version>1.2.75</fastjson.version>
|
||||||
|
<groovy-all.version>3.0.10</groovy-all.version>
|
||||||
|
<guava.version>31.1-jre</guava.version>
|
||||||
|
<htmlcleaner.version>2.26</htmlcleaner.version>
|
||||||
|
<httpclient.version>4.5.13</httpclient.version>
|
||||||
|
<httpcore.version>4.4.14</httpcore.version>
|
||||||
|
<jedis.version>3.7.1</jedis.version>
|
||||||
|
<jruby.version>9.2.14.0</jruby.version>
|
||||||
|
<json-path.version>2.6.0</json-path.version>
|
||||||
|
<junit.version>4.13.2</junit.version>
|
||||||
|
<jython.version>2.7.2</jython.version>
|
||||||
|
<log4j.version>1.2.17</log4j.version>
|
||||||
|
<mockito-all.version>1.10.19</mockito-all.version>
|
||||||
|
<moco.version>1.1.0</moco.version>
|
||||||
|
<phantomjsdriver.version>1.2.0</phantomjsdriver.version>
|
||||||
|
<saxon-he.version>10.3</saxon-he.version>
|
||||||
|
<selenium-java.version>3.141.59</selenium-java.version>
|
||||||
|
<slf4j.version>1.7.36</slf4j.version>
|
||||||
<spring-version>4.0.0.RELEASE</spring-version>
|
<spring-version>4.0.0.RELEASE</spring-version>
|
||||||
|
<xsoup.version>0.3.2</xsoup.version>
|
||||||
</properties>
|
</properties>
|
||||||
<artifactId>webmagic-parent</artifactId>
|
<artifactId>webmagic-parent</artifactId>
|
||||||
<name>webmagic-parent</name>
|
<name>webmagic-parent</name>
|
||||||
|
@ -58,59 +82,59 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.13.1</version>
|
<version>${junit.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
<version>1.10.19</version>
|
<version>${mockito-all.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
<version>4.5.13</version>
|
<version>${httpclient.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpcore</artifactId>
|
<artifactId>httpcore</artifactId>
|
||||||
<version>4.4.14</version>
|
<version>${httpcore.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>30.1-jre</version>
|
<version>${guava.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.jayway.jsonpath</groupId>
|
<groupId>com.jayway.jsonpath</groupId>
|
||||||
<artifactId>json-path</artifactId>
|
<artifactId>json-path</artifactId>
|
||||||
<version>2.5.0</version>
|
<version>${json-path.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
<version>1.7.30</version>
|
<version>${slf4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-log4j12</artifactId>
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
<version>1.7.30</version>
|
<version>${slf4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>xsoup</artifactId>
|
<artifactId>xsoup</artifactId>
|
||||||
<version>0.3.2</version>
|
<version>${xsoup.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.alibaba</groupId>
|
<groupId>com.alibaba</groupId>
|
||||||
<artifactId>fastjson</artifactId>
|
<artifactId>fastjson</artifactId>
|
||||||
<version>1.2.75</version>
|
<version>${fastjson.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.dreamhead</groupId>
|
<groupId>com.github.dreamhead</groupId>
|
||||||
<artifactId>moco-core</artifactId>
|
<artifactId>moco-core</artifactId>
|
||||||
<version>1.1.0</version>
|
<version>${moco.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
|
@ -122,73 +146,73 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
<groupId>log4j</groupId>
|
||||||
<artifactId>log4j</artifactId>
|
<artifactId>log4j</artifactId>
|
||||||
<version>1.2.17</version>
|
<version>${log4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.assertj</groupId>
|
<groupId>org.assertj</groupId>
|
||||||
<artifactId>assertj-core</artifactId>
|
<artifactId>assertj-core</artifactId>
|
||||||
<version>3.18.1</version>
|
<version>${assertj.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
<version>3.11</version>
|
<version>${commons-lang3.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-collections</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-collections</artifactId>
|
<artifactId>commons-collections4</artifactId>
|
||||||
<version>3.2.2</version>
|
<version>${commons-collections4.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-io</groupId>
|
<groupId>commons-io</groupId>
|
||||||
<artifactId>commons-io</artifactId>
|
<artifactId>commons-io</artifactId>
|
||||||
<version>2.8.0</version>
|
<version>${commons-io.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.codehaus.groovy</groupId>
|
<groupId>org.codehaus.groovy</groupId>
|
||||||
<artifactId>groovy-all</artifactId>
|
<artifactId>groovy-all</artifactId>
|
||||||
<version>3.0.7</version>
|
<version>${groovy-all.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.jruby</groupId>
|
<groupId>org.jruby</groupId>
|
||||||
<artifactId>jruby</artifactId>
|
<artifactId>jruby</artifactId>
|
||||||
<version>9.2.14.0</version>
|
<version>${jruby.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.python</groupId>
|
<groupId>org.python</groupId>
|
||||||
<artifactId>jython</artifactId>
|
<artifactId>jython</artifactId>
|
||||||
<version>2.7.2</version>
|
<version>${jython.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.seleniumhq.selenium</groupId>
|
<groupId>org.seleniumhq.selenium</groupId>
|
||||||
<artifactId>selenium-java</artifactId>
|
<artifactId>selenium-java</artifactId>
|
||||||
<version>3.141.59</version>
|
<version>${selenium-java.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.sf.saxon</groupId>
|
<groupId>net.sf.saxon</groupId>
|
||||||
<artifactId>Saxon-HE</artifactId>
|
<artifactId>Saxon-HE</artifactId>
|
||||||
<version>10.3</version>
|
<version>${saxon-he.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.sourceforge.htmlcleaner</groupId>
|
<groupId>net.sourceforge.htmlcleaner</groupId>
|
||||||
<artifactId>htmlcleaner</artifactId>
|
<artifactId>htmlcleaner</artifactId>
|
||||||
<version>2.9</version>
|
<version>${htmlcleaner.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.detro</groupId>
|
<groupId>com.github.detro</groupId>
|
||||||
<artifactId>phantomjsdriver</artifactId>
|
<artifactId>phantomjsdriver</artifactId>
|
||||||
<version>1.2.0</version>
|
<version>${phantomjsdriver.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-cli</groupId>
|
<groupId>commons-cli</groupId>
|
||||||
<artifactId>commons-cli</artifactId>
|
<artifactId>commons-cli</artifactId>
|
||||||
<version>1.4</version>
|
<version>${commons-cli.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>redis.clients</groupId>
|
<groupId>redis.clients</groupId>
|
||||||
<artifactId>jedis</artifactId>
|
<artifactId>jedis</artifactId>
|
||||||
<version>3.6.0</version>
|
<version>${jedis.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
|
@ -52,8 +52,8 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-collections</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-collections</artifactId>
|
<artifactId>commons-collections4</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -1,6 +1,20 @@
|
||||||
package us.codecraft.webmagic;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import org.apache.commons.lang3.SerializationUtils;
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -17,16 +31,6 @@ import us.codecraft.webmagic.thread.CountableThreadPool;
|
||||||
import us.codecraft.webmagic.utils.UrlUtils;
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
import us.codecraft.webmagic.utils.WMCollections;
|
import us.codecraft.webmagic.utils.WMCollections;
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.concurrent.locks.Condition;
|
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Entrance of a crawler.<br>
|
* Entrance of a crawler.<br>
|
||||||
* A spider contains four modules: Downloader, Scheduler, PageProcessor and
|
* A spider contains four modules: Downloader, Scheduler, PageProcessor and
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package us.codecraft.webmagic.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafer@gmail.com
|
* @author code4crafer@gmail.com
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
package us.codecraft.webmagic.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.nodes.Node;
|
import org.jsoup.nodes.Node;
|
||||||
import org.jsoup.nodes.TextNode;
|
import org.jsoup.nodes.TextNode;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CSS selector. Based on Jsoup.
|
* CSS selector. Based on Jsoup.
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
package us.codecraft.webmagic.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import com.alibaba.fastjson.JSON;
|
|
||||||
import com.jayway.jsonpath.JsonPath;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import com.alibaba.fastjson.JSON;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* JsonPath selector.<br>
|
* JsonPath selector.<br>
|
||||||
|
@ -16,15 +16,20 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class JsonPathSelector implements Selector {
|
public class JsonPathSelector implements Selector {
|
||||||
|
|
||||||
private String jsonPathStr;
|
private final String jsonPathStr;
|
||||||
|
|
||||||
private JsonPath jsonPath;
|
private final JsonPath jsonPath;
|
||||||
|
|
||||||
public JsonPathSelector(String jsonPathStr) {
|
public JsonPathSelector(String jsonPathStr) {
|
||||||
this.jsonPathStr = jsonPathStr;
|
this.jsonPathStr = jsonPathStr;
|
||||||
this.jsonPath = JsonPath.compile(this.jsonPathStr);
|
this.jsonPath = JsonPath.compile(this.jsonPathStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
public String getJsonPathStr() {
|
||||||
|
return jsonPathStr;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String select(String text) {
|
public String select(String text) {
|
||||||
Object object = jsonPath.read(text);
|
Object object = jsonPath.read(text);
|
||||||
|
@ -32,8 +37,8 @@ public class JsonPathSelector implements Selector {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (object instanceof List) {
|
if (object instanceof List) {
|
||||||
List list = (List) object;
|
List<?> list = (List<?>) object;
|
||||||
if (list != null && list.size() > 0) {
|
if (list.size() > 0) {
|
||||||
return toString(list.iterator().next());
|
return toString(list.iterator().next());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,8 +54,9 @@ public class JsonPathSelector implements Selector {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public List<String> selectList(String text) {
|
public List<String> selectList(String text) {
|
||||||
List<String> list = new ArrayList<String>();
|
List<String> list = new ArrayList<>();
|
||||||
Object object = jsonPath.read(text);
|
Object object = jsonPath.read(text);
|
||||||
if (object == null) {
|
if (object == null) {
|
||||||
return list;
|
return list;
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
package us.codecraft.webmagic.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import us.codecraft.xsoup.XPathEvaluator;
|
import us.codecraft.xsoup.XPathEvaluator;
|
||||||
import us.codecraft.xsoup.Xsoup;
|
import us.codecraft.xsoup.Xsoup;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XPath selector based on Xsoup.<br>
|
* XPath selector based on Xsoup.<br>
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
package us.codecraft.webmagic.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import com.github.dreamhead.moco.HttpServer;
|
|
||||||
import com.github.dreamhead.moco.Runnable;
|
import java.io.IOException;
|
||||||
import com.github.dreamhead.moco.Runner;
|
import java.io.UnsupportedEncodingException;
|
||||||
import org.apache.commons.collections.map.HashedMap;
|
import java.util.Map;
|
||||||
|
import org.apache.commons.collections4.map.HashedMap;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
import org.apache.http.client.methods.HttpUriRequest;
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
|
@ -11,6 +12,9 @@ import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
import org.apache.http.impl.client.HttpClients;
|
import org.apache.http.impl.client.HttpClients;
|
||||||
import org.apache.http.util.EntityUtils;
|
import org.apache.http.util.EntityUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import com.github.dreamhead.moco.HttpServer;
|
||||||
|
import com.github.dreamhead.moco.Runnable;
|
||||||
|
import com.github.dreamhead.moco.Runner;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
@ -21,12 +25,19 @@ import us.codecraft.webmagic.proxy.SimpleProxyProvider;
|
||||||
import us.codecraft.webmagic.selector.Html;
|
import us.codecraft.webmagic.selector.Html;
|
||||||
import us.codecraft.webmagic.utils.CharsetUtils;
|
import us.codecraft.webmagic.utils.CharsetUtils;
|
||||||
import us.codecraft.webmagic.utils.HttpConstant;
|
import us.codecraft.webmagic.utils.HttpConstant;
|
||||||
|
import static com.github.dreamhead.moco.Moco.and;
|
||||||
import java.io.IOException;
|
import static com.github.dreamhead.moco.Moco.by;
|
||||||
import java.io.UnsupportedEncodingException;
|
import static com.github.dreamhead.moco.Moco.cookie;
|
||||||
import java.util.Map;
|
import static com.github.dreamhead.moco.Moco.eq;
|
||||||
|
import static com.github.dreamhead.moco.Moco.form;
|
||||||
import static com.github.dreamhead.moco.Moco.*;
|
import static com.github.dreamhead.moco.Moco.header;
|
||||||
|
import static com.github.dreamhead.moco.Moco.httpServer;
|
||||||
|
import static com.github.dreamhead.moco.Moco.method;
|
||||||
|
import static com.github.dreamhead.moco.Moco.not;
|
||||||
|
import static com.github.dreamhead.moco.Moco.query;
|
||||||
|
import static com.github.dreamhead.moco.Moco.text;
|
||||||
|
import static com.github.dreamhead.moco.Moco.uri;
|
||||||
|
import static com.github.dreamhead.moco.Moco.with;
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
package us.codecraft.webmagic.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Task;
|
import us.codecraft.webmagic.Task;
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
|
@ -19,7 +21,7 @@ public class MockGithubDownloader implements Downloader {
|
||||||
Page page = new Page();
|
Page page = new Page();
|
||||||
InputStream resourceAsStream = this.getClass().getResourceAsStream("/html/mock-github.html");
|
InputStream resourceAsStream = this.getClass().getResourceAsStream("/html/mock-github.html");
|
||||||
try {
|
try {
|
||||||
page.setRawText(IOUtils.toString(resourceAsStream));
|
page.setRawText(IOUtils.toString(resourceAsStream, Charset.defaultCharset()));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
package us.codecraft.webmagic.model;
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
|
@ -16,7 +18,7 @@ public class PageMocker {
|
||||||
|
|
||||||
public Page getMockJsonPage() throws IOException {
|
public Page getMockJsonPage() throws IOException {
|
||||||
Page page = new Page();
|
Page page = new Page();
|
||||||
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json")));
|
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json"), Charset.defaultCharset()));
|
||||||
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
|
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
|
||||||
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
|
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
|
||||||
return page;
|
return page;
|
||||||
|
@ -24,7 +26,7 @@ public class PageMocker {
|
||||||
|
|
||||||
public Page getMockPage() throws IOException {
|
public Page getMockPage() throws IOException {
|
||||||
Page page = new Page();
|
Page page = new Page();
|
||||||
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
|
page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html"), Charset.defaultCharset()));
|
||||||
page.setRequest(new Request("http://webmagic.io/list/0"));
|
page.setRequest(new Request("http://webmagic.io/list/0"));
|
||||||
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
page.setUrl(new PlainText("http://webmagic.io/list/0"));
|
||||||
return page;
|
return page;
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
package us.codecraft.webmagic.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Spider;
|
import us.codecraft.webmagic.Spider;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.webmagic.selector.JsonPathSelector;
|
import us.codecraft.webmagic.selector.JsonPathSelector;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
* @since 0.5.0
|
* @since 0.5.0
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
package us.codecraft.webmagic.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.Spider;
|
import us.codecraft.webmagic.Spider;
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
package us.codecraft.webmagic.scripts;
|
package us.codecraft.webmagic.scripts;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
|
import javax.script.ScriptContext;
|
||||||
|
import javax.script.ScriptEngine;
|
||||||
|
import javax.script.ScriptException;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.jruby.RubyHash;
|
import org.jruby.RubyHash;
|
||||||
import org.python.core.PyDictionary;
|
import org.python.core.PyDictionary;
|
||||||
|
@ -7,14 +16,6 @@ import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import javax.script.ScriptContext;
|
|
||||||
import javax.script.ScriptEngine;
|
|
||||||
import javax.script.ScriptException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
* @since 0.4.1
|
* @since 0.4.1
|
||||||
|
@ -39,7 +40,7 @@ public class ScriptProcessor implements PageProcessor {
|
||||||
enginePool = new ScriptEnginePool(language, threadNum);
|
enginePool = new ScriptEnginePool(language, threadNum);
|
||||||
InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile());
|
InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile());
|
||||||
try {
|
try {
|
||||||
defines = IOUtils.toString(resourceAsStream);
|
defines = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new IllegalArgumentException(e);
|
throw new IllegalArgumentException(e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
package us.codecraft.webmagic.scripts;
|
package us.codecraft.webmagic.scripts;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author code4crafter@gmail.com
|
* @author code4crafter@gmail.com
|
||||||
|
@ -35,7 +37,7 @@ public class ScriptProcessorBuilder {
|
||||||
public ScriptProcessorBuilder scriptFromFile(String fileName) {
|
public ScriptProcessorBuilder scriptFromFile(String fileName) {
|
||||||
try {
|
try {
|
||||||
InputStream resourceAsStream = new FileInputStream(fileName);
|
InputStream resourceAsStream = new FileInputStream(fileName);
|
||||||
this.script = IOUtils.toString(resourceAsStream);
|
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
//wrap IOException because I prefer a runtime exception...
|
//wrap IOException because I prefer a runtime exception...
|
||||||
throw new IllegalArgumentException(e);
|
throw new IllegalArgumentException(e);
|
||||||
|
@ -46,7 +48,7 @@ public class ScriptProcessorBuilder {
|
||||||
public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) {
|
public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) {
|
||||||
try {
|
try {
|
||||||
InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
|
InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
|
||||||
this.script = IOUtils.toString(resourceAsStream);
|
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
//wrap IOException because I prefer a runtime exception...
|
//wrap IOException because I prefer a runtime exception...
|
||||||
throw new IllegalArgumentException(e);
|
throw new IllegalArgumentException(e);
|
||||||
|
|
Loading…
Reference in New Issue