Check valid left parenthesis

master
Almark Ming 2013-12-17 16:55:53 +08:00
parent 0c3ff3d6b1
commit 83926970b2
1 changed files with 102 additions and 93 deletions

View File

@ -1,93 +1,102 @@
package us.codecraft.webmagic.selector; package us.codecraft.webmagic.selector;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
/** /**
* Selector in regex.<br> * Selector in regex.<br>
* *
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @since 0.1.0 * @since 0.1.0
*/ */
public class RegexSelector implements Selector { public class RegexSelector implements Selector {
private String regexStr; private String regexStr;
private Pattern regex; private Pattern regex;
private int group = 1; private int group = 1;
public RegexSelector(String regexStr, int group) { public RegexSelector(String regexStr, int group) {
if (StringUtils.isBlank(regexStr)) { if (StringUtils.isBlank(regexStr)) {
throw new IllegalArgumentException("regex must not be empty"); throw new IllegalArgumentException("regex must not be empty");
} }
if (!StringUtils.contains(regexStr, "(") && !StringUtils.contains(regexStr, ")")) { /* Can't detect '\(', '(?:)' so that would be result in ArrayIndexOutOfBoundsException
regexStr = "(" + regexStr + ")"; if (!StringUtils.contains(regexStr, "(") && !StringUtils.contains(regexStr, ")")) {
} regexStr = "(" + regexStr + ")";
if (!StringUtils.contains(regexStr, "(") || !StringUtils.contains(regexStr, ")")) { }
throw new IllegalArgumentException("regex must have capture group 1"); if (!StringUtils.contains(regexStr, "(") || !StringUtils.contains(regexStr, ")")) {
} throw new IllegalArgumentException("regex must have capture group 1");
this.regexStr = regexStr; }
try { */
regex = Pattern.compile(regexStr, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) { // Try to fix: Only check if there exists the valid left parenthesis, leave regexp validation for Pattern
throw new IllegalArgumentException("invalid regex", e); if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\\\\\(") ==
} StringUtils.countMatches(regexStr, "(?:") - StringUtils.countMatches(regexStr, "\\\\\\(?:")) {
this.group = group; regexStr = "(" + regexStr + ")";
} }
public RegexSelector(String regexStr) { this.regexStr = regexStr;
this(regexStr, 1); try {
} regex = Pattern.compile(regexStr, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
@Override throw new IllegalArgumentException("invalid regex", e);
public String select(String text) { }
return selectGroup(text).get(group); this.group = group;
} }
@Override public RegexSelector(String regexStr) {
public List<String> selectList(String text) { this(regexStr, 1);
List<String> strings = new ArrayList<String>(); }
List<RegexResult> results = selectGroupList(text);
for (RegexResult result : results) { @Override
strings.add(result.get(group)); public String select(String text) {
} return selectGroup(text).get(group);
return strings; }
}
@Override
public RegexResult selectGroup(String text) { public List<String> selectList(String text) {
Matcher matcher = regex.matcher(text); List<String> strings = new ArrayList<String>();
if (matcher.find()) { List<RegexResult> results = selectGroupList(text);
String[] groups = new String[matcher.groupCount() + 1]; for (RegexResult result : results) {
for (int i = 0; i < groups.length; i++) { strings.add(result.get(group));
groups[i] = matcher.group(i); }
} return strings;
return new RegexResult(groups); }
}
return RegexResult.EMPTY_RESULT; public RegexResult selectGroup(String text) {
} Matcher matcher = regex.matcher(text);
if (matcher.find()) {
public List<RegexResult> selectGroupList(String text) { String[] groups = new String[matcher.groupCount() + 1];
Matcher matcher = regex.matcher(text); for (int i = 0; i < groups.length; i++) {
List<RegexResult> resultList = new ArrayList<RegexResult>(); groups[i] = matcher.group(i);
while (matcher.find()) { }
String[] groups = new String[matcher.groupCount() + 1]; return new RegexResult(groups);
for (int i = 0; i < groups.length; i++) { }
groups[i] = matcher.group(i); return RegexResult.EMPTY_RESULT;
} }
resultList.add(new RegexResult(groups));
} public List<RegexResult> selectGroupList(String text) {
return resultList; Matcher matcher = regex.matcher(text);
} List<RegexResult> resultList = new ArrayList<RegexResult>();
while (matcher.find()) {
@Override String[] groups = new String[matcher.groupCount() + 1];
public String toString() { for (int i = 0; i < groups.length; i++) {
return regexStr; groups[i] = matcher.group(i);
} }
resultList.add(new RegexResult(groups));
} }
return resultList;
}
@Override
public String toString() {
return regexStr;
}
}