commit
84b046e4c9
|
@ -38,7 +38,7 @@ public class QuickStarter {
|
|||
key = readKey(key);
|
||||
System.out.println("The demo started and will last 20 seconds...");
|
||||
//Start spider
|
||||
OOSpider.create(Site.me().addStartUrl(urlMap.get(key)), clazzMap.get(key)).pipeline(new MultiPagePipeline()).pipeline(new ConsolePipeline()).runAsync();
|
||||
OOSpider.create(Site.me(), clazzMap.get(key)).addUrl(urlMap.get(key)).addPipeline(new MultiPagePipeline()).addPipeline(new ConsolePipeline()).runAsync();
|
||||
|
||||
try {
|
||||
Thread.sleep(20000);
|
||||
|
@ -57,7 +57,7 @@ public class QuickStarter {
|
|||
System.out.println(classEntry.getKey()+"\t" + classEntry.getValue() + "\t" + urlMap.get(classEntry.getKey()));
|
||||
}
|
||||
while (key == null) {
|
||||
key = new String(stdin.nextLine());
|
||||
key = stdin.nextLine();
|
||||
if (clazzMap.get(key) == null) {
|
||||
System.out.println("Invalid choice!");
|
||||
key = null;
|
||||
|
|
|
@ -19,7 +19,7 @@ public class GithubRepoPageProcessor implements PageProcessor {
|
|||
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all());
|
||||
GithubRepo githubRepo = new GithubRepo();
|
||||
githubRepo.setAuthor(page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
||||
githubRepo.setName(page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString());
|
||||
githubRepo.setName(page.getHtml().xpath("//h1[contains(@class, 'entry-title') and contains(@class, 'public')]/strong/a/text()").toString());
|
||||
githubRepo.setReadme(page.getHtml().xpath("//div[@id='readme']/tidyText()").toString());
|
||||
if (githubRepo.getName() == null) {
|
||||
//skip this page
|
||||
|
|
|
@ -18,7 +18,7 @@ public class SpiderTest {
|
|||
@Ignore
|
||||
@Test
|
||||
public void testSpider() throws InterruptedException {
|
||||
Spider me = Spider.create(new HuxiuProcessor()).pipeline(new FilePipeline());
|
||||
Spider me = Spider.create(new HuxiuProcessor()).addPipeline(new FilePipeline());
|
||||
me.run();
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,7 @@ public class SpiderTest {
|
|||
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
||||
System.out.println(pageProcessor2.getSite().getCharset());
|
||||
pageProcessor2.getSite().setSleepTime(500);
|
||||
Spider.create(pageProcessor2).pipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).
|
||||
Spider.create(pageProcessor2).addPipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).
|
||||
run();
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue