update author
parent
b393e38320
commit
cfb8990453
|
@ -4,7 +4,7 @@ import org.apache.commons.lang3.builder.ToStringBuilder;
|
|||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午3:41 <br>
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午5:29 <br>
|
||||
*/
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午5:29 <br>
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import us.codecraft.webmagic.Site;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-1 <br>
|
||||
* Time: 下午10:18 <br>
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import org.junit.Test;
|
|||
import us.codecraft.webmagic.Site;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-1 <br>
|
||||
* Time: 下午8:42 <br>
|
||||
*/
|
||||
|
|
|
@ -15,7 +15,7 @@ import javax.xml.xpath.XPathExpression;
|
|||
import javax.xml.xpath.XPathFactoryConfigurationException;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-2 <br>
|
||||
* Time: 下午5:48 <br>
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ import us.codecraft.webmagic.schedular.Scheduler;
|
|||
/**
|
||||
* 使用redis管理url,构建一个分布式的爬虫。<br>
|
||||
*
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-25 <br>
|
||||
* Time: 上午7:07 <br>
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-25 <br>
|
||||
* Time: 上午7:51 <br>
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
|||
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
|
||||
* 需要下载Selenium driver支持。<br>
|
||||
*
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午1:37 <br>
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import java.util.concurrent.LinkedBlockingDeque;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午1:41 <br>
|
||||
*/
|
||||
|
|
|
@ -13,7 +13,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午12:27 <br>
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.Task;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午2:46 <br>
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import org.junit.Test;
|
|||
import org.openqa.selenium.WebDriver;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午2:12 <br>
|
||||
*/
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package us.codecraft.webmagic.oo.samples;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-2 <br>
|
||||
* Time: 上午8:10 <br>
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import us.codecraft.webmagic.oo.OOSpider;
|
|||
import us.codecraft.webmagic.oo.TargetUrl;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-2 <br>
|
||||
* Time: 上午7:52 <br>
|
||||
*/
|
||||
|
|
|
@ -5,7 +5,7 @@ import us.codecraft.webmagic.Site;
|
|||
import us.codecraft.webmagic.oo.*;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-3 <br>
|
||||
* Time: 下午8:25 <br>
|
||||
*/
|
||||
|
@ -17,7 +17,7 @@ public class OschinaAnswer implements AfterExtractor{
|
|||
@ExtractBy("//img/@title")
|
||||
private String user;
|
||||
|
||||
@ExtractBy(value="//div[@class='detail']",notNull = false)
|
||||
@ExtractBy("//div[@class='detail']")
|
||||
private String content;
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
@ -6,7 +6,7 @@ import us.codecraft.webmagic.oo.OOSpider;
|
|||
import us.codecraft.webmagic.oo.TargetUrl;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-8-2 <br>
|
||||
* Time: 上午7:52 <br>
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ import us.codecraft.webmagic.scheduler.RedisScheduler;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Author yihua.huang@dianping.com
|
||||
* Author code4crafter@gmail.com
|
||||
* Date: 13-6-24
|
||||
* Time: 下午2:12
|
||||
*/
|
||||
|
|
|
@ -6,7 +6,7 @@ import us.codecraft.webmagic.processor.SimplePageProcessor;
|
|||
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-14 <br>
|
||||
* Time: 上午8:33 <br>
|
||||
*/
|
||||
|
|
|
@ -11,7 +11,7 @@ import us.codecraft.webmagic.selenium.downloader.SeleniumDownloader;
|
|||
/**
|
||||
* 花瓣网抽取器。<br>
|
||||
* 使用Selenium做页面动态渲染。<br>
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午4:08 <br>
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,7 @@ import us.codecraft.webmagic.pipeline.FilePipeline;
|
|||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 上午7:31 <br>
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue