diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java index f4ae058..e475148 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java @@ -15,8 +15,8 @@ public class GithubRepoPageProcessor implements PageProcessor { @Override public void process(Page page) { - page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all()); - page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all()); + page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-])").all()); + page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all()); page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); if (page.getResultItems().get("name")==null){