[BugFix]Only one url from sourceRegion can be extracted #107
parent
08fa3b01c1
commit
b06aa489fb
|
@ -122,6 +122,16 @@ public class PlainText implements Selectable {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Selectable select(Selector selector) {
|
||||
return select(selector, strings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Selectable selectList(Selector selector) {
|
||||
return selectList(selector, strings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return get();
|
||||
|
|
|
@ -128,4 +128,19 @@ public interface Selectable {
|
|||
*/
|
||||
public Selectable jsonPath(String jsonPath);
|
||||
|
||||
/**
|
||||
* extract by custom selector
|
||||
*
|
||||
* @param selector
|
||||
* @return
|
||||
*/
|
||||
public Selectable select(Selector selector);
|
||||
|
||||
/**
|
||||
* extract by custom selector
|
||||
*
|
||||
* @param selector
|
||||
* @return
|
||||
*/
|
||||
public Selectable selectList(Selector selector);
|
||||
}
|
||||
|
|
|
@ -7,9 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
|||
import us.codecraft.webmagic.selector.Selector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -66,7 +64,7 @@ class ModelPageProcessor implements PageProcessor {
|
|||
if (urlRegionSelector == null) {
|
||||
links = page.getHtml().links().all();
|
||||
} else {
|
||||
links = urlRegionSelector.selectList(page.getHtml().toString());
|
||||
links = page.getHtml().selectList(urlRegionSelector).links().all();
|
||||
}
|
||||
for (String link : links) {
|
||||
for (Pattern targetUrlPattern : urlPatterns) {
|
||||
|
|
Loading…
Reference in New Issue