change author info
parent
672db58db7
commit
8cef8774cb
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午11:22
|
||||
*/
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
package us.codecraft.webmagic;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Request对象是
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午11:37
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午12:13
|
||||
*/
|
||||
|
|
|
@ -14,7 +14,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午6:53
|
||||
*/
|
||||
|
|
|
@ -5,11 +5,18 @@ import us.codecraft.webmagic.Request;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Downloader是webmagic抓取页面的核心接口。
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午12:14
|
||||
*/
|
||||
public interface Downloader {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param request
|
||||
* @param site
|
||||
* @return
|
||||
*/
|
||||
public Page download(Request request,Site site);
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
|||
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午12:15
|
||||
*/
|
||||
|
|
|
@ -13,7 +13,7 @@ import org.apache.http.params.*;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午12:29
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.selector.Selectable;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:45
|
||||
*/
|
||||
|
|
|
@ -13,7 +13,7 @@ import java.io.PrintWriter;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午6:28
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:39
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午11:42
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-22
|
||||
* Time: 下午9:15
|
||||
*/
|
||||
|
|
|
@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:13
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.concurrent.BlockingQueue;
|
|||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:13
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import us.codecraft.webmagic.Request;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:12
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:54
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:54
|
||||
*/
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:39
|
||||
*/
|
||||
|
|
|
@ -9,7 +9,7 @@ import java.util.regex.Pattern;
|
|||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:09
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.regex.Pattern;
|
|||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:09
|
||||
*/
|
||||
|
|
|
@ -3,7 +3,7 @@ package us.codecraft.webmagic.selector;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-20
|
||||
* Time: 下午7:51
|
||||
*/
|
||||
|
|
|
@ -3,7 +3,7 @@ package us.codecraft.webmagic.selector;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-20
|
||||
* Time: 下午8:02
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:56
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
/**
|
||||
* readability算法,基础是找到所有p标签的父节点
|
||||
* 写的比较乱,最终效果还在尝试中
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午4:42
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午9:39
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:52
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import org.junit.Test;
|
|||
import us.codecraft.webmagic.selector.Html;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午8:42
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import junit.framework.Assert;
|
|||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 上午7:13
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import org.junit.Assert;
|
|||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* User: cairne Date: 13-4-21 Time: 上午10:06
|
||||
* Author: code4crafter@gmail.com Date: 13-4-21 Time: 上午10:06
|
||||
*/
|
||||
public class XpathSelectorTest {
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ import org.junit.Assert;
|
|||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午2:22
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import us.codecraft.webmagic.utils.UrlUtils;
|
|||
import java.io.*;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-6-8
|
||||
* Time: 下午9:00
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-6-9
|
||||
* Time: 上午7:14
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@ import us.codecraft.webmagic.selector.PlainText;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:48
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-5-20
|
||||
* Time: 下午5:31
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-5-20
|
||||
* Time: 下午5:31
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:48
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:48
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午8:08
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import us.codecraft.webmagic.Page;
|
|||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:48
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-21
|
||||
* Time: 下午1:48
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@ import us.codecraft.webmagic.samples.HuxiuProcessor;
|
|||
import us.codecraft.webmagic.schedular.FileCacheQueueSchedular;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-4-20
|
||||
* Time: 下午7:46
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import us.codecraft.webmagic.schedular.FileCacheQueueSchedular;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-6-9
|
||||
* Time: 上午8:02
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import us.codecraft.webmagic.schedular.FileCacheQueueSchedular;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-6-9
|
||||
* Time: 上午8:02
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import us.codecraft.webmagic.schedular.FileCacheQueueSchedular;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* User: cairne
|
||||
* Author: code4crafter@gmail.com
|
||||
* Date: 13-6-9
|
||||
* Time: 上午8:02
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue