add test
parent
ade5714d50
commit
8b15f3c63d
|
@ -220,6 +220,11 @@ public class Spider implements Runnable, Task {
|
|||
}
|
||||
}
|
||||
|
||||
public void test(String url){
|
||||
checkComponent();
|
||||
processRequest(new Request(url));
|
||||
}
|
||||
|
||||
private void processRequest(Request request) {
|
||||
Page page = downloader.download(request, this);
|
||||
if (page == null) {
|
||||
|
|
|
@ -1,23 +1,67 @@
|
|||
package us.codecraft.webmagic.model.samples;
|
||||
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
|
||||
import us.codecraft.webmagic.model.HasKey;
|
||||
import us.codecraft.webmagic.model.OOSpider;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
|
||||
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* Date: 13-8-10 <br>
|
||||
* Time: 下午6:37 <br>
|
||||
*/
|
||||
@TargetUrl("https://github.com/code4craft/*")
|
||||
public class GithubRepo {
|
||||
@TargetUrl("https://github.com/\\w+/\\w+")
|
||||
@HelpUrl({"https://github.com/\\w+\\?tab=repositories","https://github.com/\\w+","https://github.com/explore/*"})
|
||||
public class GithubRepo implements HasKey {
|
||||
|
||||
@ExtractBy("//h1[@class='entry-title']/strong/a/text()")
|
||||
@ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true)
|
||||
private String name;
|
||||
|
||||
@ExtractByUrl("https://github\\.com/(\\w+)/.*")
|
||||
private String author;
|
||||
|
||||
@ExtractBy("//div[@id='readme']")
|
||||
private String readme;
|
||||
|
||||
@ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']",multi = true)
|
||||
private List<String> language;
|
||||
|
||||
@ExtractByUrl
|
||||
private String url;
|
||||
|
||||
public static void main(String[] args) {
|
||||
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/"), new ConsolePageModelPipeline(), GithubRepo.class).run();
|
||||
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0),
|
||||
new JsonFilePageModelPipeline(), GithubRepo.class).thread(15).run();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String key() {
|
||||
return author+":"+name;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getReadme() {
|
||||
return readme;
|
||||
}
|
||||
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public List<String> getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public String getUrl() {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue