From 5daf92e8b294fbf75169050b98db6a12a06acdf4 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 22 Jul 2017 17:27:36 +0800 Subject: [PATCH] #610 CASE_INSENSITIVE for charset detect in Content-Type --- .../src/main/java/us/codecraft/webmagic/utils/UrlUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java index 6864606..87a6a56 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java @@ -108,7 +108,7 @@ public class UrlUtils { return urlList; } - private static final Pattern patternForCharset = Pattern.compile("charset\\s*=\\s*['\"]*([^\\s;'\"]*)"); + private static final Pattern patternForCharset = Pattern.compile("charset\\s*=\\s*['\"]*([^\\s;'\"]*)", Pattern.CASE_INSENSITIVE); public static String getCharset(String contentType) { Matcher matcher = patternForCharset.matcher(contentType);