remove site.addStartUrl in samples
parent
c13110c4cb
commit
8564d51e56
|
@ -21,7 +21,7 @@ public class DianpingFtlDataScanner implements AfterExtractor {
|
||||||
private List<String> data;
|
private List<String> data;
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me().addStartUrl("http://w.alpha.dp/").setSleepTime(0), DianpingFtlDataScanner.class)
|
OOSpider.create(Site.me().setSleepTime(0), DianpingFtlDataScanner.class)
|
||||||
.thread(5).run();
|
.thread(5).run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,9 +41,10 @@ public class GithubRepo implements HasKey {
|
||||||
private String url;
|
private String url;
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0).setRetryTimes(3),
|
OOSpider.create(Site.me().setSleepTime(0).setRetryTimes(3),
|
||||||
new JsonFilePageModelPipeline(), GithubRepo.class)
|
new JsonFilePageModelPipeline(), GithubRepo.class)
|
||||||
.scheduler(new FileCacheQueueScheduler("/data/webmagic/cache/")).thread(15).run();
|
.addUrl("https://github.com/explore")
|
||||||
|
.setScheduler(new FileCacheQueueScheduler("/data/webmagic/cache/")).thread(15).run();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class IteyeBlog implements Blog{
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me().addStartUrl("http://flashsword20.iteye.com/blog"), IteyeBlog.class).run();
|
OOSpider.create(Site.me(), IteyeBlog.class).addUrl("http://flashsword20.iteye.com/blog").run();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
|
|
|
@ -32,12 +32,12 @@ public class Kr36NewsModel {
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException, JMException {
|
public static void main(String[] args) throws IOException, JMException {
|
||||||
//Just for benchmark
|
//Just for benchmark
|
||||||
Spider thread = OOSpider.create(Site.me().addStartUrl("http://www.36kr.com/").setSleepTime(0), new PageModelPipeline() {
|
Spider thread = OOSpider.create(Site.me().setSleepTime(0), new PageModelPipeline() {
|
||||||
@Override
|
@Override
|
||||||
public void process(Object o, Task task) {
|
public void process(Object o, Task task) {
|
||||||
|
|
||||||
}
|
}
|
||||||
}, Kr36NewsModel.class).thread(20);
|
}, Kr36NewsModel.class).thread(20).addUrl("http://www.36kr.com/");
|
||||||
thread.start();
|
thread.start();
|
||||||
SpiderMonitor spiderMonitor = SpiderMonitor.instance();
|
SpiderMonitor spiderMonitor = SpiderMonitor.instance();
|
||||||
spiderMonitor.register(thread);
|
spiderMonitor.register(thread);
|
||||||
|
|
|
@ -22,7 +22,7 @@ public class OschinaAnswer implements AfterExtractor{
|
||||||
private String content;
|
private String content;
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me().addStartUrl("http://www.oschina.net/question/567527_120597"), OschinaAnswer.class).run();
|
OOSpider.create(Site.me(), OschinaAnswer.class).addUrl("http://www.oschina.net/question/567527_120597").run();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -26,7 +26,7 @@ public class OschinaBlog{
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
OOSpider.create(Site.me()
|
OOSpider.create(Site.me()
|
||||||
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36").addStartUrl("http://my.oschina.net/flashsword/blog")
|
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36")
|
||||||
.setSleepTime(0)
|
.setSleepTime(0)
|
||||||
.setRetryTimes(3)
|
.setRetryTimes(3)
|
||||||
,new PageModelPipeline() {
|
,new PageModelPipeline() {
|
||||||
|
@ -34,7 +34,7 @@ public class OschinaBlog{
|
||||||
public void process(Object o, Task task) {
|
public void process(Object o, Task task) {
|
||||||
|
|
||||||
}
|
}
|
||||||
}, OschinaBlog.class).thread(10).run();
|
}, OschinaBlog.class).thread(10).addUrl("http://my.oschina.net/flashsword/blog").run();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
|
|
Loading…
Reference in New Issue