diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 7fd2d09..d60ab5c 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -10,7 +10,8 @@ import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.RequestBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; @@ -34,7 +35,7 @@ import java.util.Set; @ThreadSafe public class HttpClientDownloader implements Downloader { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); private final Map httpClients = new HashMap(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java index 04709f2..8eab426 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java @@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline; import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.annotation.ThreadSafe; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.utils.FilePersistentBase; @@ -21,7 +22,7 @@ import java.util.Map; @ThreadSafe public class FilePipeline extends FilePersistentBase implements Pipeline { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); /** * create a FilePipeline with default path"/data/webmagic/" diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java index fa951e1..9a3fda7 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java @@ -1,7 +1,8 @@ package us.codecraft.webmagic.scheduler; import org.apache.http.annotation.ThreadSafe; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.utils.NumberUtils; @@ -24,7 +25,7 @@ public class PriorityScheduler implements Scheduler { public static final int INITIAL_CAPACITY = 5; - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); private BlockingQueue noPriorityQueue = new LinkedBlockingQueue(); @@ -46,9 +47,7 @@ public class PriorityScheduler implements Scheduler { @Override public synchronized void push(Request request, Task task) { - if (logger.isDebugEnabled()) { - logger.debug("push to queue " + request.getUrl()); - } + logger.debug("push to queue " + request.getUrl()); if (urls.add(request.getUrl())) { if (request.getPriority() == 0) { noPriorityQueue.add(request); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java index b263f91..ebab857 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java @@ -1,7 +1,8 @@ package us.codecraft.webmagic.scheduler; import org.apache.http.annotation.ThreadSafe; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; @@ -10,6 +11,7 @@ import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; + /** * Basic Scheduler implementation.
* Store urls to fetch in LinkedBlockingQueue and remove duplicate urls by HashMap. @@ -20,7 +22,7 @@ import java.util.concurrent.LinkedBlockingQueue; @ThreadSafe public class QueueScheduler implements Scheduler { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); private BlockingQueue queue = new LinkedBlockingQueue(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java index 1dce782..3f5df76 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java @@ -1,8 +1,9 @@ package us.codecraft.webmagic.selector; -import org.apache.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.utils.EnvironmentUtil; import java.util.ArrayList; @@ -16,7 +17,7 @@ import java.util.List; */ public class Html extends PlainText { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); /** * Store parsed document for better performance when only one text exist. diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/PropertyLoader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/PropertyLoader.java index 28521b8..bffbcf2 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/PropertyLoader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/PropertyLoader.java @@ -5,11 +5,14 @@ import us.codecraft.webmagic.processor.PageProcessor; import java.util.Map; /** + * Inject property to object by {@link Inject} annotation. + * * @author yihua.huang@dianping.com */ -public interface PropertyLoader { +public class PropertyLoader { - PropertyLoader clazz(Class clazz); + public T load(T object, Map properties) { + return object; + } - T load(Map properties); } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java index 154667c..3c7e6ff 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java @@ -2,7 +2,8 @@ package us.codecraft.webmagic.downloader; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.*; import us.codecraft.webmagic.utils.Experimental; import us.codecraft.webmagic.pipeline.Pipeline; @@ -28,7 +29,7 @@ public class FileCache extends FilePersistentBase implements Downloader, Pipelin private final PageProcessor pageProcessor; - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); public FileCache(String startUrl, String urlPattern) { this(startUrl, urlPattern, "/data/webmagic/temp/"); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoApi.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoApi.java index deacde7..34608fd 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoApi.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoApi.java @@ -27,7 +27,7 @@ public class GithubRepoApi implements HasKey { @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.stargazers_count") private int star; - @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.forks_count") + @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.homepage") private int fork; @ExtractByUrl diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index 62b6de0..5e4da11 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -1,7 +1,8 @@ package us.codecraft.webmagic.model; import org.apache.commons.lang3.StringUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.model.annotation.*; import us.codecraft.webmagic.model.formatter.BasicTypeFormatter; @@ -40,7 +41,7 @@ class PageModelExtractor { private Extractor objectExtractor; - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); public static PageModelExtractor create(Class clazz) { PageModelExtractor pageModelExtractor = new PageModelExtractor(); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java index 273b18b..c4826e2 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java @@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.model.HasKey; import us.codecraft.webmagic.utils.FilePersistentBase; @@ -21,7 +22,7 @@ import java.io.PrintWriter; */ public class FilePageModelPipeline extends FilePersistentBase implements PageModelPipeline { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); /** * new JsonFilePageModelPipeline with default path "/data/webmagic/" diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java index 4e35dfe..1583b0c 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java @@ -3,7 +3,8 @@ package us.codecraft.webmagic.pipeline; import com.alibaba.fastjson.JSON; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.model.HasKey; import us.codecraft.webmagic.utils.FilePersistentBase; @@ -22,7 +23,7 @@ import java.io.PrintWriter; */ public class JsonFilePageModelPipeline extends FilePersistentBase implements PageModelPipeline { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); /** * new JsonFilePageModelPipeline with default path "/data/webmagic/" diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java index 625313f..b6c55af 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java @@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline; import com.alibaba.fastjson.JSON; import org.apache.commons.codec.digest.DigestUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.utils.FilePersistentBase; @@ -20,7 +21,7 @@ import java.io.PrintWriter; */ public class JsonFilePipeline extends FilePersistentBase implements Pipeline { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); /** * new JsonFilePageModelPipeline with default path "/data/webmagic/" diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java index 3aa23c7..38e8a79 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java @@ -2,7 +2,8 @@ package us.codecraft.webmagic.scheduler; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; -import org.apache.log4j.Logger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; @@ -24,7 +25,7 @@ import java.util.concurrent.atomic.AtomicInteger; */ public class FileCacheQueueScheduler implements Scheduler { - private Logger logger = Logger.getLogger(getClass()); + private Logger logger = LoggerFactory.getLogger(getClass()); private String filePath = System.getProperty("java.io.tmpdir");