diff --git a/pom.xml b/pom.xml
index 08250fd..81d0d6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -73,17 +73,17 @@
org.apache.httpcomponents
httpcore
- 4.4.13
+ 4.4.14
com.google.guava
guava
- 30.0-android
+ 30.1-jre
com.jayway.jsonpath
json-path
- 2.4.0
+ 2.5.0
org.slf4j
@@ -103,7 +103,7 @@
com.alibaba
fastjson
- 1.2.69
+ 1.2.75
com.github.dreamhead
@@ -125,13 +125,13 @@
org.assertj
assertj-core
- 3.16.1
+ 3.18.1
test
org.apache.commons
commons-lang3
- 3.10
+ 3.11
commons-collections
@@ -139,19 +139,19 @@
3.2.2
- commons-io
- commons-io
- 2.7
-
+ commons-io
+ commons-io
+ 2.8.0
+
org.codehaus.groovy
groovy-all
- 2.4.19
+ 3.0.7
org.jruby
jruby
- 9.2.11.1
+ 9.2.14.0
org.jsoup
@@ -171,12 +171,12 @@
net.sf.saxon
Saxon-HE
- 10.1
+ 10.3
net.sourceforge.htmlcleaner
htmlcleaner
- 2.5
+ 2.24
com.github.detro
@@ -191,7 +191,7 @@
redis.clients
jedis
- 2.9.3
+ 3.4.1
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/LinksSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/LinksSelector.java
index 5296a74..2dafe8e 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/LinksSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/LinksSelector.java
@@ -1,12 +1,12 @@
package us.codecraft.webmagic.selector;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
/**
* Links selector based on jsoup. Use absolute url.
*
@@ -23,9 +23,9 @@ public class LinksSelector extends BaseElementSelector {
@Override
public List selectList(Element element) {
Elements elements = element.select("a");
- List links = new ArrayList(elements.size());
+ List links = new ArrayList<>(elements.size());
for (Element element0 : elements) {
- if (!StringUtil.isBlank(element0.baseUri())) {
+ if (StringUtils.isNotBlank(element0.baseUri())) {
links.add(element0.attr("abs:href"));
} else {
links.add(element0.attr("href"));
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisPriorityScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisPriorityScheduler.java
index 540574a..46d47e5 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisPriorityScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisPriorityScheduler.java
@@ -1,22 +1,23 @@
package us.codecraft.webmagic.scheduler;
-import com.alibaba.fastjson.JSON;
+import java.util.Set;
+
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
+
+import com.alibaba.fastjson.JSON;
+
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
-import java.util.Set;
-
/**
* the redis scheduler with priority
* @author sai
* Created by sai on 16-5-27.
*/
-public class RedisPriorityScheduler extends RedisScheduler
-{
+public class RedisPriorityScheduler extends RedisScheduler {
private static final String ZSET_PREFIX = "zset_";
@@ -37,62 +38,44 @@ public class RedisPriorityScheduler extends RedisScheduler
}
@Override
- protected void pushWhenNoDuplicate(Request request, Task task)
- {
- Jedis jedis = pool.getResource();
- try
- {
- if(request.getPriority() > 0)
+ protected void pushWhenNoDuplicate(Request request, Task task) {
+ try (Jedis jedis = pool.getResource()) {
+ if (request.getPriority() > 0) {
jedis.zadd(getZsetPlusPriorityKey(task), request.getPriority(), request.getUrl());
- else if(request.getPriority() < 0)
+ } else if (request.getPriority() < 0) {
jedis.zadd(getZsetMinusPriorityKey(task), request.getPriority(), request.getUrl());
- else
+ } else {
jedis.lpush(getQueueNoPriorityKey(task), request.getUrl());
+ }
setExtrasInItem(jedis, request, task);
}
- finally
- {
- pool.returnResource(jedis);
- }
}
@Override
- public synchronized Request poll(Task task)
- {
- Jedis jedis = pool.getResource();
- try
- {
+ public synchronized Request poll(Task task) {
+ try (Jedis jedis = pool.getResource()) {
String url = getRequest(jedis, task);
- if(StringUtils.isBlank(url))
+ if (StringUtils.isBlank(url)) {
return null;
+ }
return getExtrasInItem(jedis, url, task);
}
- finally
- {
- pool.returnResource(jedis);
- }
}
- private String getRequest(Jedis jedis, Task task)
- {
+ private String getRequest(Jedis jedis, Task task) {
String url;
Set urls = jedis.zrevrange(getZsetPlusPriorityKey(task), 0, 0);
- if(urls.isEmpty())
- {
+ if (urls.isEmpty()) {
url = jedis.lpop(getQueueNoPriorityKey(task));
- if(StringUtils.isBlank(url))
- {
+ if (StringUtils.isBlank(url)) {
urls = jedis.zrevrange(getZsetMinusPriorityKey(task), 0, 0);
- if(!urls.isEmpty())
- {
+ if (!urls.isEmpty()) {
url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetMinusPriorityKey(task), url);
}
}
- }
- else
- {
+ } else {
url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetPlusPriorityKey(task), url);
}
@@ -100,51 +83,39 @@ public class RedisPriorityScheduler extends RedisScheduler
}
@Override
- public void resetDuplicateCheck(Task task)
- {
- Jedis jedis = pool.getResource();
- try
- {
+ public void resetDuplicateCheck(Task task) {
+ try (Jedis jedis = pool.getResource()) {
jedis.del(getSetKey(task));
}
- finally
- {
- pool.returnResource(jedis);
- }
}
- private String getZsetPlusPriorityKey(Task task)
- {
+ private String getZsetPlusPriorityKey(Task task) {
return ZSET_PREFIX + task.getUUID() + PLUS_PRIORITY_SUFFIX;
}
- private String getQueueNoPriorityKey(Task task)
- {
+ private String getQueueNoPriorityKey(Task task) {
return QUEUE_PREFIX + task.getUUID() + NO_PRIORITY_SUFFIX;
}
- private String getZsetMinusPriorityKey(Task task)
- {
+ private String getZsetMinusPriorityKey(Task task) {
return ZSET_PREFIX + task.getUUID() + MINUS_PRIORITY_SUFFIX;
}
- private void setExtrasInItem(Jedis jedis,Request request, Task task)
- {
- if(request.getExtras() != null)
- {
- String field = DigestUtils.shaHex(request.getUrl());
+ private void setExtrasInItem(Jedis jedis,Request request, Task task) {
+ if (request.getExtras() != null) {
+ String field = DigestUtils.sha1Hex(request.getUrl());
String value = JSON.toJSONString(request);
jedis.hset(getItemKey(task), field, value);
}
}
- private Request getExtrasInItem(Jedis jedis, String url, Task task)
- {
+ private Request getExtrasInItem(Jedis jedis, String url, Task task) {
String key = getItemKey(task);
- String field = DigestUtils.shaHex(url);
+ String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
- if(bytes != null)
+ if (bytes != null) {
return JSON.parseObject(new String(bytes), Request.class);
+ }
return new Request(url);
}
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
index c70d885..19e8313 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
@@ -1,8 +1,10 @@
package us.codecraft.webmagic.scheduler;
-import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
+
+import com.alibaba.fastjson.JSON;
+
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
@@ -37,21 +39,15 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public void resetDuplicateCheck(Task task) {
- Jedis jedis = pool.getResource();
- try {
+ try (Jedis jedis = pool.getResource()) {
jedis.del(getSetKey(task));
- } finally {
- pool.returnResource(jedis);
}
}
@Override
public boolean isDuplicate(Request request, Task task) {
- Jedis jedis = pool.getResource();
- try {
+ try (Jedis jedis = pool.getResource()) {
return jedis.sadd(getSetKey(task), request.getUrl()) == 0;
- } finally {
- pool.returnResource(jedis);
}
}
@@ -62,7 +58,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
try {
jedis.rpush(getQueueKey(task), request.getUrl());
if (checkForAdditionalInfo(request)) {
- String field = DigestUtils.shaHex(request.getUrl());
+ String field = DigestUtils.sha1Hex(request.getUrl());
String value = JSON.toJSONString(request);
jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
}
@@ -100,14 +96,13 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public synchronized Request poll(Task task) {
- Jedis jedis = pool.getResource();
- try {
+ try (Jedis jedis = pool.getResource()) {
String url = jedis.lpop(getQueueKey(task));
if (url == null) {
return null;
}
String key = ITEM_PREFIX + task.getUUID();
- String field = DigestUtils.shaHex(url);
+ String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if (bytes != null) {
Request o = JSON.parseObject(new String(bytes), Request.class);
@@ -115,8 +110,6 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
}
Request request = new Request(url);
return request;
- } finally {
- pool.returnResource(jedis);
}
}
@@ -134,23 +127,17 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public int getLeftRequestsCount(Task task) {
- Jedis jedis = pool.getResource();
- try {
+ try (Jedis jedis = pool.getResource()) {
Long size = jedis.llen(getQueueKey(task));
return size.intValue();
- } finally {
- pool.returnResource(jedis);
}
}
@Override
public int getTotalRequestsCount(Task task) {
- Jedis jedis = pool.getResource();
- try {
+ try (Jedis jedis = pool.getResource()) {
Long size = jedis.scard(getSetKey(task));
return size.intValue();
- } finally {
- pool.returnResource(jedis);
}
}
}