commit
5d9fda0614
|
@ -26,12 +26,12 @@ public class RegexSelector implements Selector {
|
|||
if (StringUtils.isBlank(regexStr)) {
|
||||
throw new IllegalArgumentException("regex must not be empty");
|
||||
}
|
||||
if (!StringUtils.contains(regexStr, "(") && !StringUtils.contains(regexStr, ")")) {
|
||||
// Check bracket for regex group. Add default group 1 if there is no group.
|
||||
// Only check if there exists the valid left parenthesis, leave regexp validation for Pattern.
|
||||
if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") ==
|
||||
StringUtils.countMatches(regexStr, "(?:") - StringUtils.countMatches(regexStr, "\\(?:")) {
|
||||
regexStr = "(" + regexStr + ")";
|
||||
}
|
||||
if (!StringUtils.contains(regexStr, "(") || !StringUtils.contains(regexStr, ")")) {
|
||||
throw new IllegalArgumentException("regex must have capture group 1");
|
||||
}
|
||||
this.regexStr = regexStr;
|
||||
try {
|
||||
regex = Pattern.compile(regexStr, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -8,14 +8,18 @@ import org.junit.Test;
|
|||
*/
|
||||
public class RegexSelectorTest {
|
||||
|
||||
@Test
|
||||
public void testInvalidRegex() {
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testRegexWithSingleLeftBracket() {
|
||||
String regex = "\\d+(";
|
||||
try {
|
||||
new RegexSelector(regex);
|
||||
Assert.assertNotNull(regex);
|
||||
} catch (Exception e) {
|
||||
new RegexSelector(regex);
|
||||
}
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testRegexWithLeftBracketQuoted() {
|
||||
String regex = "\\(.+";
|
||||
String source = "(hello world";
|
||||
RegexSelector regexSelector = new RegexSelector(regex);
|
||||
String select = regexSelector.select(source);
|
||||
Assert.assertEquals(source,select);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue