From d2aebc60a7cb72fbd8107c844983e24543e106e4 Mon Sep 17 00:00:00 2001 From: Joe Zhou Date: Tue, 4 Jun 2024 00:57:28 +0800 Subject: [PATCH] Make getCharset to support null parameter. --- .../src/main/java/us/codecraft/webmagic/utils/UrlUtils.java | 4 ++++ .../test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java index c61483a..ea317c4 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java @@ -116,6 +116,10 @@ public class UrlUtils { private static final Pattern patternForCharset = Pattern.compile("charset\\s*=\\s*['\"]*([^\\s;'\"]*)", Pattern.CASE_INSENSITIVE); public static String getCharset(String contentType) { + if (contentType == null) { + return null; + } + Matcher matcher = patternForCharset.matcher(contentType); if (matcher.find()) { String charset = matcher.group(1); diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java index 6afdeef..38c8295 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java @@ -1,5 +1,7 @@ package us.codecraft.webmagic.utils; +import static org.junit.Assert.assertNull; + import org.junit.Assert; import org.junit.Test; @@ -43,5 +45,9 @@ public class UrlUtilsTest { Assert.assertEquals("www.dianping.com",UrlUtils.getDomain(url)); } + @Test + public void testGetCharset() { + assertNull(UrlUtils.getCharset(null)); + } }