[BugFix]multi model in one pageprocessor will be skipped #85
parent
7aaf837e15
commit
dafd0b5875
|
@ -55,11 +55,14 @@ class ModelPageProcessor implements PageProcessor {
|
|||
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
|
||||
Object process = pageModelExtractor.process(page);
|
||||
if (process == null || (process instanceof List && ((List) process).size() == 0)) {
|
||||
page.getResultItems().setSkip(true);
|
||||
continue;
|
||||
}
|
||||
postProcessPageModel(pageModelExtractor.getClazz(), process);
|
||||
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
|
||||
}
|
||||
if (page.getResultItems().getAll().size() == 0) {
|
||||
page.getResultItems().setSkip(true);
|
||||
}
|
||||
}
|
||||
|
||||
private void extractLinks(Page page, Selector urlRegionSelector, List<Pattern> urlPatterns) {
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
package us.codecraft.webmagic.model;
|
||||
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* @date 14-4-4
|
||||
*/
|
||||
public class ModelPageProcessorTest {
|
||||
|
||||
@TargetUrl("http://codecraft.us/foo")
|
||||
public static class ModelFoo {
|
||||
|
||||
@ExtractBy(value = "//div/@foo", notNull = true)
|
||||
private String foo;
|
||||
|
||||
}
|
||||
|
||||
@TargetUrl("http://codecraft.us/bar")
|
||||
public static class ModelBar {
|
||||
|
||||
@ExtractBy(value = "//div/@bar", notNull = true)
|
||||
private String bar;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiModel_should_not_skip_when_match() throws Exception {
|
||||
Page page = new Page();
|
||||
page.setRawText("<div foo='foo'></div>");
|
||||
page.setRequest(new Request("http://codecraft.us/foo"));
|
||||
page.setUrl(PlainText.create("http://codecraft.us/foo"));
|
||||
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
|
||||
modelPageProcessor.process(page);
|
||||
assertThat(page.getResultItems().isSkip()).isFalse();
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue