#629 correct illegal url in HttpUriRequestConverter
parent
5daf92e8b2
commit
3266ea15ca
|
@ -58,7 +58,7 @@ public class HttpUriRequestConverter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
|
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
|
||||||
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
|
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl()));
|
||||||
if (site.getHeaders() != null) {
|
if (site.getHeaders() != null) {
|
||||||
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
|
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
|
||||||
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
|
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class UrlUtils {
|
||||||
if (url.startsWith("?"))
|
if (url.startsWith("?"))
|
||||||
url = base.getPath() + url;
|
url = base.getPath() + url;
|
||||||
URL abs = new URL(base, url);
|
URL abs = new URL(base, url);
|
||||||
return encodeIllegalCharacterInUrl(abs.toExternalForm());
|
return abs.toExternalForm();
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@ -53,12 +53,17 @@ public class UrlUtils {
|
||||||
*
|
*
|
||||||
* @param url url
|
* @param url url
|
||||||
* @return new url
|
* @return new url
|
||||||
|
* @deprecated
|
||||||
*/
|
*/
|
||||||
public static String encodeIllegalCharacterInUrl(String url) {
|
public static String encodeIllegalCharacterInUrl(String url) {
|
||||||
//TODO more charator support
|
|
||||||
return url.replace(" ", "%20");
|
return url.replace(" ", "%20");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String fixIllegalCharacterInUrl(String url) {
|
||||||
|
//TODO more charator support
|
||||||
|
return url.replace(" ", "%20").replaceAll("#+", "#");
|
||||||
|
}
|
||||||
|
|
||||||
public static String getHost(String url) {
|
public static String getHost(String url) {
|
||||||
String host = url;
|
String host = url;
|
||||||
int i = StringUtils.ordinalIndexOf(url, "/", 3);
|
int i = StringUtils.ordinalIndexOf(url, "/", 3);
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import us.codecraft.webmagic.Request;
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* Date: 2017/7/22
|
||||||
|
* Time: 下午5:29
|
||||||
|
*/
|
||||||
|
public class HttpUriRequestConverterTest {
|
||||||
|
|
||||||
|
@Test(expected = IllegalArgumentException.class)
|
||||||
|
public void test_illegal_uri() throws Exception {
|
||||||
|
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
|
||||||
|
httpUriRequestConverter.convert(new Request("http://bj.zhongkao.com/beikao/yimo/##"), Site.me(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_illegal_uri_correct() throws Exception {
|
||||||
|
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
|
||||||
|
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(new Request(UrlUtils.fixIllegalCharacterInUrl("http://bj.zhongkao.com/beikao/yimo/##")), Site.me(), null);
|
||||||
|
assertThat(requestContext.getHttpUriRequest().getURI()).isEqualTo(new URI("http://bj.zhongkao.com/beikao/yimo/#"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue