[BugFix]Only one url from sourceRegion can be extracted #107
parent
08fa3b01c1
commit
b06aa489fb
|
@ -122,6 +122,16 @@ public class PlainText implements Selectable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Selectable select(Selector selector) {
|
||||||
|
return select(selector, strings);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Selectable selectList(Selector selector) {
|
||||||
|
return selectList(selector, strings);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return get();
|
return get();
|
||||||
|
|
|
@ -128,4 +128,19 @@ public interface Selectable {
|
||||||
*/
|
*/
|
||||||
public Selectable jsonPath(String jsonPath);
|
public Selectable jsonPath(String jsonPath);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* extract by custom selector
|
||||||
|
*
|
||||||
|
* @param selector
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Selectable select(Selector selector);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* extract by custom selector
|
||||||
|
*
|
||||||
|
* @param selector
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Selectable selectList(Selector selector);
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.webmagic.selector.Selector;
|
import us.codecraft.webmagic.selector.Selector;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@ -66,7 +64,7 @@ class ModelPageProcessor implements PageProcessor {
|
||||||
if (urlRegionSelector == null) {
|
if (urlRegionSelector == null) {
|
||||||
links = page.getHtml().links().all();
|
links = page.getHtml().links().all();
|
||||||
} else {
|
} else {
|
||||||
links = urlRegionSelector.selectList(page.getHtml().toString());
|
links = page.getHtml().selectList(urlRegionSelector).links().all();
|
||||||
}
|
}
|
||||||
for (String link : links) {
|
for (String link : links) {
|
||||||
for (Pattern targetUrlPattern : urlPatterns) {
|
for (Pattern targetUrlPattern : urlPatterns) {
|
||||||
|
|
Loading…
Reference in New Issue