release notes and docs

master
yihua.huang 2013-08-11 10:21:26 +08:00
parent 1f86ce7720
commit 787b952932
3 changed files with 31 additions and 5 deletions

View File

@ -28,13 +28,15 @@ Release Notes
}
增加一个Spider.test(url)方法,用于开发爬虫时进行调试。
增加基于redis的分布式支持。
增加XPath2.0语法支持(webmagic-saxon模块)。
增加基于Selenium的浏览器渲染支持用于抓取动态加载内容(webmagic-selenium模块)。
修复一些已有bug。
修复了不支持https的bug。
补充了文档:[webmagic-0.2.0用户手册](http://code4craft.github.io/webmagic/)。

View File

@ -220,9 +220,17 @@ public class Spider implements Runnable, Task {
}
}
public void test(String url){
/**
* URL
* @param urls url
*/
public void test(String... urls){
checkComponent();
processRequest(new Request(url));
if (urls.length>0){
for (String url : urls) {
processRequest(new Request(url));
}
}
}
private void processRequest(Request request) {

View File

@ -8,6 +8,7 @@ import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.util.List;
@ -32,12 +33,19 @@ public class GithubRepo implements HasKey {
@ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']",multi = true)
private List<String> language;
@ExtractBy("//a[@class='social-count js-social-count']/text()")
private String star;
@ExtractBy("//a[@class='social-count js-social-count']/text()")
private String fork;
@ExtractByUrl
private String url;
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0),
new JsonFilePageModelPipeline(), GithubRepo.class).thread(15).run();
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0).setRetryTimes(3),
new JsonFilePageModelPipeline(), GithubRepo.class)
.scheduler(new FileCacheQueueScheduler("/data/webmagic/cache/")).thread(15).run();
}
@Override
@ -64,4 +72,12 @@ public class GithubRepo implements HasKey {
public String getUrl() {
return url;
}
public String getStar() {
return star;
}
public String getFork() {
return fork;
}
}