fix issue #21 charset detect error

master
yihua.huang 2013-09-04 15:27:51 +08:00
parent e1b6b54097
commit 692de76f86
1 changed files with 7 additions and 4 deletions

View File

@ -2,6 +2,7 @@ package us.codecraft.webmagic.utils;
import org.apache.commons.lang3.StringUtils;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -98,15 +99,17 @@ public class UrlUtils {
return stringBuilder.toString();
}
private static final Pattern patternForCharset = Pattern.compile("charset=([^\\s;]*)");
private static final Pattern patternForCharset = Pattern.compile("charset\\s*=\\s*['\"]*([^\\s;'\"]*)");
public static String getCharset(String contentType) {
Matcher matcher = patternForCharset.matcher(contentType);
if (matcher.find()) {
return matcher.group(1);
} else {
return null;
String charset = matcher.group(1);
if (Charset.isSupported(charset)) {
return charset;
}
}
return null;
}
}