fix GithubRepoPageProcessor in example
parent
cfed860fb9
commit
dafd2b77ff
|
@ -11,11 +11,11 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
*/
|
*/
|
||||||
public class GithubRepoPageProcessor implements PageProcessor {
|
public class GithubRepoPageProcessor implements PageProcessor {
|
||||||
|
|
||||||
private Site site = Site.me().setRetryTimes(3).setSleepTime(0);
|
private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(Page page) {
|
public void process(Page page) {
|
||||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-])").all());
|
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all());
|
||||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all());
|
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all());
|
||||||
page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
||||||
page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString());
|
page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString());
|
||||||
|
|
Loading…
Reference in New Issue