#629 correct illegal url in HttpUriRequestConverter
parent
5daf92e8b2
commit
3266ea15ca
|
@ -58,7 +58,7 @@ public class HttpUriRequestConverter {
|
|||
}
|
||||
|
||||
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
|
||||
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
|
||||
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl()));
|
||||
if (site.getHeaders() != null) {
|
||||
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
|
||||
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
|
||||
|
|
|
@ -43,7 +43,7 @@ public class UrlUtils {
|
|||
if (url.startsWith("?"))
|
||||
url = base.getPath() + url;
|
||||
URL abs = new URL(base, url);
|
||||
return encodeIllegalCharacterInUrl(abs.toExternalForm());
|
||||
return abs.toExternalForm();
|
||||
} catch (MalformedURLException e) {
|
||||
return "";
|
||||
}
|
||||
|
@ -53,12 +53,17 @@ public class UrlUtils {
|
|||
*
|
||||
* @param url url
|
||||
* @return new url
|
||||
* @deprecated
|
||||
*/
|
||||
public static String encodeIllegalCharacterInUrl(String url) {
|
||||
//TODO more charator support
|
||||
return url.replace(" ", "%20");
|
||||
}
|
||||
|
||||
public static String fixIllegalCharacterInUrl(String url) {
|
||||
//TODO more charator support
|
||||
return url.replace(" ", "%20").replaceAll("#+", "#");
|
||||
}
|
||||
|
||||
public static String getHost(String url) {
|
||||
String host = url;
|
||||
int i = StringUtils.ordinalIndexOf(url, "/", 3);
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package us.codecraft.webmagic.downloader;
|
||||
|
||||
import org.junit.Test;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com
|
||||
* Date: 2017/7/22
|
||||
* Time: 下午5:29
|
||||
*/
|
||||
public class HttpUriRequestConverterTest {
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void test_illegal_uri() throws Exception {
|
||||
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
|
||||
httpUriRequestConverter.convert(new Request("http://bj.zhongkao.com/beikao/yimo/##"), Site.me(), null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_illegal_uri_correct() throws Exception {
|
||||
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
|
||||
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(new Request(UrlUtils.fixIllegalCharacterInUrl("http://bj.zhongkao.com/beikao/yimo/##")), Site.me(), null);
|
||||
assertThat(requestContext.getHttpUriRequest().getURI()).isEqualTo(new URI("http://bj.zhongkao.com/beikao/yimo/#"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue