Upgrade dependencies, including the jedis from 2.9.3 to 3.4.1.
parent
0d73f08ef6
commit
0e01550a79
24
pom.xml
24
pom.xml
|
@ -73,17 +73,17 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
<version>4.4.13</version>
|
||||
<version>4.4.14</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>30.0-android</version>
|
||||
<version>30.1-jre</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<version>2.5.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
|
@ -103,7 +103,7 @@
|
|||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>fastjson</artifactId>
|
||||
<version>1.2.69</version>
|
||||
<version>1.2.75</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.dreamhead</groupId>
|
||||
|
@ -125,13 +125,13 @@
|
|||
<dependency>
|
||||
<groupId>org.assertj</groupId>
|
||||
<artifactId>assertj-core</artifactId>
|
||||
<version>3.16.1</version>
|
||||
<version>3.18.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.10</version>
|
||||
<version>3.11</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-collections</groupId>
|
||||
|
@ -141,17 +141,17 @@
|
|||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.7</version>
|
||||
<version>2.8.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.groovy</groupId>
|
||||
<artifactId>groovy-all</artifactId>
|
||||
<version>2.4.19</version>
|
||||
<version>3.0.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jruby</groupId>
|
||||
<artifactId>jruby</artifactId>
|
||||
<version>9.2.11.1</version>
|
||||
<version>9.2.14.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
|
@ -171,12 +171,12 @@
|
|||
<dependency>
|
||||
<groupId>net.sf.saxon</groupId>
|
||||
<artifactId>Saxon-HE</artifactId>
|
||||
<version>10.1</version>
|
||||
<version>10.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sourceforge.htmlcleaner</groupId>
|
||||
<artifactId>htmlcleaner</artifactId>
|
||||
<version>2.5</version>
|
||||
<version>2.24</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.detro</groupId>
|
||||
|
@ -191,7 +191,7 @@
|
|||
<dependency>
|
||||
<groupId>redis.clients</groupId>
|
||||
<artifactId>jedis</artifactId>
|
||||
<version>2.9.3</version>
|
||||
<version>3.4.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import org.jsoup.helper.StringUtil;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
/**
|
||||
* Links selector based on jsoup. Use absolute url. <br>
|
||||
*
|
||||
|
@ -23,9 +23,9 @@ public class LinksSelector extends BaseElementSelector {
|
|||
@Override
|
||||
public List<String> selectList(Element element) {
|
||||
Elements elements = element.select("a");
|
||||
List<String> links = new ArrayList<String>(elements.size());
|
||||
List<String> links = new ArrayList<>(elements.size());
|
||||
for (Element element0 : elements) {
|
||||
if (!StringUtil.isBlank(element0.baseUri())) {
|
||||
if (StringUtils.isNotBlank(element0.baseUri())) {
|
||||
links.add(element0.attr("abs:href"));
|
||||
} else {
|
||||
links.add(element0.attr("href"));
|
||||
|
|
|
@ -1,22 +1,23 @@
|
|||
package us.codecraft.webmagic.scheduler;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
|
||||
import redis.clients.jedis.Jedis;
|
||||
import redis.clients.jedis.JedisPool;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Task;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* the redis scheduler with priority
|
||||
* @author sai
|
||||
* Created by sai on 16-5-27.
|
||||
*/
|
||||
public class RedisPriorityScheduler extends RedisScheduler
|
||||
{
|
||||
public class RedisPriorityScheduler extends RedisScheduler {
|
||||
|
||||
private static final String ZSET_PREFIX = "zset_";
|
||||
|
||||
|
@ -37,62 +38,44 @@ public class RedisPriorityScheduler extends RedisScheduler
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void pushWhenNoDuplicate(Request request, Task task)
|
||||
{
|
||||
Jedis jedis = pool.getResource();
|
||||
try
|
||||
{
|
||||
if(request.getPriority() > 0)
|
||||
protected void pushWhenNoDuplicate(Request request, Task task) {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
if (request.getPriority() > 0) {
|
||||
jedis.zadd(getZsetPlusPriorityKey(task), request.getPriority(), request.getUrl());
|
||||
else if(request.getPriority() < 0)
|
||||
} else if (request.getPriority() < 0) {
|
||||
jedis.zadd(getZsetMinusPriorityKey(task), request.getPriority(), request.getUrl());
|
||||
else
|
||||
} else {
|
||||
jedis.lpush(getQueueNoPriorityKey(task), request.getUrl());
|
||||
}
|
||||
|
||||
setExtrasInItem(jedis, request, task);
|
||||
}
|
||||
finally
|
||||
{
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Request poll(Task task)
|
||||
{
|
||||
Jedis jedis = pool.getResource();
|
||||
try
|
||||
{
|
||||
public synchronized Request poll(Task task) {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
String url = getRequest(jedis, task);
|
||||
if(StringUtils.isBlank(url))
|
||||
if (StringUtils.isBlank(url)) {
|
||||
return null;
|
||||
return getExtrasInItem(jedis, url, task);
|
||||
}
|
||||
finally
|
||||
{
|
||||
pool.returnResource(jedis);
|
||||
return getExtrasInItem(jedis, url, task);
|
||||
}
|
||||
}
|
||||
|
||||
private String getRequest(Jedis jedis, Task task)
|
||||
{
|
||||
private String getRequest(Jedis jedis, Task task) {
|
||||
String url;
|
||||
Set<String> urls = jedis.zrevrange(getZsetPlusPriorityKey(task), 0, 0);
|
||||
if(urls.isEmpty())
|
||||
{
|
||||
if (urls.isEmpty()) {
|
||||
url = jedis.lpop(getQueueNoPriorityKey(task));
|
||||
if(StringUtils.isBlank(url))
|
||||
{
|
||||
if (StringUtils.isBlank(url)) {
|
||||
urls = jedis.zrevrange(getZsetMinusPriorityKey(task), 0, 0);
|
||||
if(!urls.isEmpty())
|
||||
{
|
||||
if (!urls.isEmpty()) {
|
||||
url = urls.toArray(new String[0])[0];
|
||||
jedis.zrem(getZsetMinusPriorityKey(task), url);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
url = urls.toArray(new String[0])[0];
|
||||
jedis.zrem(getZsetPlusPriorityKey(task), url);
|
||||
}
|
||||
|
@ -100,51 +83,39 @@ public class RedisPriorityScheduler extends RedisScheduler
|
|||
}
|
||||
|
||||
@Override
|
||||
public void resetDuplicateCheck(Task task)
|
||||
{
|
||||
Jedis jedis = pool.getResource();
|
||||
try
|
||||
{
|
||||
public void resetDuplicateCheck(Task task) {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
jedis.del(getSetKey(task));
|
||||
}
|
||||
finally
|
||||
{
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
|
||||
private String getZsetPlusPriorityKey(Task task)
|
||||
{
|
||||
private String getZsetPlusPriorityKey(Task task) {
|
||||
return ZSET_PREFIX + task.getUUID() + PLUS_PRIORITY_SUFFIX;
|
||||
}
|
||||
|
||||
private String getQueueNoPriorityKey(Task task)
|
||||
{
|
||||
private String getQueueNoPriorityKey(Task task) {
|
||||
return QUEUE_PREFIX + task.getUUID() + NO_PRIORITY_SUFFIX;
|
||||
}
|
||||
|
||||
private String getZsetMinusPriorityKey(Task task)
|
||||
{
|
||||
private String getZsetMinusPriorityKey(Task task) {
|
||||
return ZSET_PREFIX + task.getUUID() + MINUS_PRIORITY_SUFFIX;
|
||||
}
|
||||
|
||||
private void setExtrasInItem(Jedis jedis,Request request, Task task)
|
||||
{
|
||||
if(request.getExtras() != null)
|
||||
{
|
||||
String field = DigestUtils.shaHex(request.getUrl());
|
||||
private void setExtrasInItem(Jedis jedis,Request request, Task task) {
|
||||
if (request.getExtras() != null) {
|
||||
String field = DigestUtils.sha1Hex(request.getUrl());
|
||||
String value = JSON.toJSONString(request);
|
||||
jedis.hset(getItemKey(task), field, value);
|
||||
}
|
||||
}
|
||||
|
||||
private Request getExtrasInItem(Jedis jedis, String url, Task task)
|
||||
{
|
||||
private Request getExtrasInItem(Jedis jedis, String url, Task task) {
|
||||
String key = getItemKey(task);
|
||||
String field = DigestUtils.shaHex(url);
|
||||
String field = DigestUtils.sha1Hex(url);
|
||||
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
|
||||
if(bytes != null)
|
||||
if (bytes != null) {
|
||||
return JSON.parseObject(new String(bytes), Request.class);
|
||||
}
|
||||
return new Request(url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
package us.codecraft.webmagic.scheduler;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
|
||||
import redis.clients.jedis.Jedis;
|
||||
import redis.clients.jedis.JedisPool;
|
||||
import redis.clients.jedis.JedisPoolConfig;
|
||||
|
@ -37,21 +39,15 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
|
|||
|
||||
@Override
|
||||
public void resetDuplicateCheck(Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
try {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
jedis.del(getSetKey(task));
|
||||
} finally {
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDuplicate(Request request, Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
try {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
return jedis.sadd(getSetKey(task), request.getUrl()) == 0;
|
||||
} finally {
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -62,7 +58,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
|
|||
try {
|
||||
jedis.rpush(getQueueKey(task), request.getUrl());
|
||||
if (checkForAdditionalInfo(request)) {
|
||||
String field = DigestUtils.shaHex(request.getUrl());
|
||||
String field = DigestUtils.sha1Hex(request.getUrl());
|
||||
String value = JSON.toJSONString(request);
|
||||
jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
|
||||
}
|
||||
|
@ -100,14 +96,13 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
|
|||
|
||||
@Override
|
||||
public synchronized Request poll(Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
try {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
String url = jedis.lpop(getQueueKey(task));
|
||||
if (url == null) {
|
||||
return null;
|
||||
}
|
||||
String key = ITEM_PREFIX + task.getUUID();
|
||||
String field = DigestUtils.shaHex(url);
|
||||
String field = DigestUtils.sha1Hex(url);
|
||||
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
|
||||
if (bytes != null) {
|
||||
Request o = JSON.parseObject(new String(bytes), Request.class);
|
||||
|
@ -115,8 +110,6 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
|
|||
}
|
||||
Request request = new Request(url);
|
||||
return request;
|
||||
} finally {
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,23 +127,17 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
|
|||
|
||||
@Override
|
||||
public int getLeftRequestsCount(Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
try {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
Long size = jedis.llen(getQueueKey(task));
|
||||
return size.intValue();
|
||||
} finally {
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTotalRequestsCount(Task task) {
|
||||
Jedis jedis = pool.getResource();
|
||||
try {
|
||||
try (Jedis jedis = pool.getResource()) {
|
||||
Long size = jedis.scard(getSetKey(task));
|
||||
return size.intValue();
|
||||
} finally {
|
||||
pool.returnResource(jedis);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue