fix URL regex in GithubRepoPageProcessor #305
parent
047cb8ff8f
commit
ca072c5575
|
@ -15,8 +15,8 @@ public class GithubRepoPageProcessor implements PageProcessor {
|
|||
|
||||
@Override
|
||||
public void process(Page page) {
|
||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all());
|
||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all());
|
||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-])").all());
|
||||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all());
|
||||
page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
||||
page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString());
|
||||
if (page.getResultItems().get("name")==null){
|
||||
|
|
Loading…
Reference in New Issue