diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java index 6bfe88d..1d9bf25 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java @@ -23,6 +23,8 @@ class ModelPageProcessor implements PageProcessor { private Site site; + private boolean extractLinks = true; + public static ModelPageProcessor create(Site site, Class... clazzs) { ModelPageProcessor modelPageProcessor = new ModelPageProcessor(site); for (Class clazz : clazzs) { @@ -45,8 +47,10 @@ class ModelPageProcessor implements PageProcessor { @Override public void process(Page page) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) { - extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns()); - extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns()); + if (extractLinks) { + extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns()); + extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns()); + } Object process = pageModelExtractor.process(page); if (process == null || (process instanceof List && ((List) process).size() == 0)) { continue; @@ -83,4 +87,12 @@ class ModelPageProcessor implements PageProcessor { public Site getSite() { return site; } + + public boolean isExtractLinks() { + return extractLinks; + } + + public void setExtractLinks(boolean extractLinks) { + this.extractLinks = extractLinks; + } } diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java index 08dc64a..eaabcca 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java @@ -97,4 +97,9 @@ public class OOSpider extends Spider { return this; } + public OOSpider setIsExtractLinks(boolean isExtractLinks){ + modelPageProcessor.setExtractLinks(isExtractLinks); + return this; + } + }