diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/main/QuickStarter.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/main/QuickStarter.java index 074dd0f..61083d6 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/main/QuickStarter.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/main/QuickStarter.java @@ -38,7 +38,7 @@ public class QuickStarter { key = readKey(key); System.out.println("The demo started and will last 20 seconds..."); //Start spider - OOSpider.create(Site.me().addStartUrl(urlMap.get(key)), clazzMap.get(key)).pipeline(new MultiPagePipeline()).pipeline(new ConsolePipeline()).runAsync(); + OOSpider.create(Site.me(), clazzMap.get(key)).addUrl(urlMap.get(key)).addPipeline(new MultiPagePipeline()).addPipeline(new ConsolePipeline()).runAsync(); try { Thread.sleep(20000); @@ -57,7 +57,7 @@ public class QuickStarter { System.out.println(classEntry.getKey()+"\t" + classEntry.getValue() + "\t" + urlMap.get(classEntry.getKey())); } while (key == null) { - key = new String(stdin.nextLine()); + key = stdin.nextLine(); if (clazzMap.get(key) == null) { System.out.println("Invalid choice!"); key = null; diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GithubRepoPageProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GithubRepoPageProcessor.java index 0de61fb..3f4e190 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GithubRepoPageProcessor.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GithubRepoPageProcessor.java @@ -19,7 +19,7 @@ public class GithubRepoPageProcessor implements PageProcessor { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all()); GithubRepo githubRepo = new GithubRepo(); githubRepo.setAuthor(page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); - githubRepo.setName(page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); + githubRepo.setName(page.getHtml().xpath("//h1[contains(@class, 'entry-title') and contains(@class, 'public')]/strong/a/text()").toString()); githubRepo.setReadme(page.getHtml().xpath("//div[@id='readme']/tidyText()").toString()); if (githubRepo.getName() == null) { //skip this page diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java index dbfa815..2fd690d 100644 --- a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java +++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java @@ -18,7 +18,7 @@ public class SpiderTest { @Ignore @Test public void testSpider() throws InterruptedException { - Spider me = Spider.create(new HuxiuProcessor()).pipeline(new FilePipeline()); + Spider me = Spider.create(new HuxiuProcessor()).addPipeline(new FilePipeline()); me.run(); } @@ -31,7 +31,7 @@ public class SpiderTest { SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html"); System.out.println(pageProcessor2.getSite().getCharset()); pageProcessor2.getSite().setSleepTime(500); - Spider.create(pageProcessor2).pipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")). + Spider.create(pageProcessor2).addPipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")). run();