Request再次重构:去掉params,仅保留HttpRequestBody
commit
fe95a6842f
|
@ -1,13 +1,16 @@
|
||||||
package us.codecraft.webmagic;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.http.Header;
|
||||||
|
|
||||||
import us.codecraft.webmagic.selector.Html;
|
import us.codecraft.webmagic.selector.Html;
|
||||||
import us.codecraft.webmagic.selector.Json;
|
import us.codecraft.webmagic.selector.Json;
|
||||||
import us.codecraft.webmagic.selector.Selectable;
|
import us.codecraft.webmagic.selector.Selectable;
|
||||||
import us.codecraft.webmagic.utils.UrlUtils;
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,7 +49,7 @@ public class Page {
|
||||||
private boolean needCycleRetry;
|
private boolean needCycleRetry;
|
||||||
|
|
||||||
private List<Request> targetRequests = new ArrayList<Request>();
|
private List<Request> targetRequests = new ArrayList<Request>();
|
||||||
|
|
||||||
public Page() {
|
public Page() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,6 +235,11 @@ public class Page {
|
||||||
", statusCode=" + statusCode +
|
", statusCode=" + statusCode +
|
||||||
", needCycleRetry=" + needCycleRetry +
|
", needCycleRetry=" + needCycleRetry +
|
||||||
", targetRequests=" + targetRequests +
|
", targetRequests=" + targetRequests +
|
||||||
|
", headers=" + headers+
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
package us.codecraft.webmagic;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
|
import org.apache.http.Header;
|
||||||
|
import org.apache.http.cookie.Cookie;
|
||||||
|
import us.codecraft.webmagic.model.HttpRequestBody;
|
||||||
import us.codecraft.webmagic.utils.Experimental;
|
import us.codecraft.webmagic.utils.Experimental;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,14 +28,19 @@ public class Request implements Serializable {
|
||||||
|
|
||||||
private String method;
|
private String method;
|
||||||
|
|
||||||
|
private HttpRequestBody requestBody;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store additional information in extras.
|
* Store additional information in extras.
|
||||||
*/
|
*/
|
||||||
private Map<String, Object> extras;
|
private Map<String, Object> extras;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* POST/GET param set
|
* cookies for current url, if not set use Site's cookies
|
||||||
* */
|
*/
|
||||||
private Map<String,String> params=new HashMap<String, String>();
|
private List<Cookie> cookies=new ArrayList<Cookie>();
|
||||||
|
|
||||||
|
private List<Header> headers=new ArrayList<Header>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Priority of the request.<br>
|
* Priority of the request.<br>
|
||||||
|
@ -109,57 +119,38 @@ public class Request implements Serializable {
|
||||||
this.method = method;
|
this.method = method;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, String> getParams() {
|
|
||||||
return params;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* set params for request
|
|
||||||
* <br>
|
|
||||||
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
|
|
||||||
* @param params params
|
|
||||||
* */
|
|
||||||
public void setParams(Map<String, String> params) {
|
|
||||||
this.params = params;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* set params for request
|
|
||||||
* <br>
|
|
||||||
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
|
|
||||||
* @param key key
|
|
||||||
* @param value value
|
|
||||||
* */
|
|
||||||
public void putParams(String key,String value) {
|
|
||||||
params.put(key,value);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
|
|
||||||
Request request = (Request) o;
|
|
||||||
|
|
||||||
if (url != null ? !url.equals(request.url) : request.url != null) return false;
|
|
||||||
if (method != null ? !method.equals(request.method) : request.method != null) return false;
|
|
||||||
return params != null ? params.equals(request.params) : request.params == null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int result = url != null ? url.hashCode() : 0;
|
int result = url != null ? url.hashCode() : 0;
|
||||||
result = 31 * result + (method != null ? method.hashCode() : 0);
|
result = 31 * result + (method != null ? method.hashCode() : 0);
|
||||||
result = 31 * result + (params != null ? params.hashCode() : 0);
|
result = 31 * result + (headers != null ? headers.hashCode() : 0);
|
||||||
|
result = 31 * result + (cookies != null ? cookies.hashCode() : 0);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Cookie> getCookies() {
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Header> getHeaders() {
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpRequestBody getRequestBody() {
|
||||||
|
return requestBody;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Request{" +
|
return "Request{" +
|
||||||
"url='" + url + '\'' +
|
"url='" + url + '\'' +
|
||||||
", method='" + method + '\'' +
|
", method='" + method + '\'' +
|
||||||
", extras=" + extras +
|
", extras=" + extras +
|
||||||
", params=" + params +
|
|
||||||
", priority=" + priority +
|
", priority=" + priority +
|
||||||
|
", headers=" + headers +
|
||||||
|
", cookies="+ cookies+
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,26 @@
|
||||||
package us.codecraft.webmagic.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.http.Header;
|
||||||
|
import org.apache.http.HttpHost;
|
||||||
import org.apache.http.HttpResponse;
|
import org.apache.http.HttpResponse;
|
||||||
import org.apache.http.annotation.ThreadSafe;
|
import org.apache.http.annotation.ThreadSafe;
|
||||||
import org.apache.http.auth.AuthState;
|
import org.apache.http.auth.AuthState;
|
||||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||||
|
import org.apache.http.client.CookieStore;
|
||||||
|
import org.apache.http.client.config.CookieSpecs;
|
||||||
|
import org.apache.http.client.config.RequestConfig;
|
||||||
|
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
import org.apache.http.client.methods.HttpUriRequest;
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
|
import org.apache.http.client.methods.RequestBuilder;
|
||||||
import org.apache.http.client.protocol.HttpClientContext;
|
import org.apache.http.client.protocol.HttpClientContext;
|
||||||
|
import org.apache.http.cookie.Cookie;
|
||||||
import org.apache.http.impl.auth.BasicScheme;
|
import org.apache.http.impl.auth.BasicScheme;
|
||||||
|
import org.apache.http.impl.client.BasicCookieStore;
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.message.BasicNameValuePair;
|
||||||
import org.apache.http.protocol.BasicHttpContext;
|
import org.apache.http.protocol.BasicHttpContext;
|
||||||
import org.apache.http.protocol.HttpContext;
|
import org.apache.http.protocol.HttpContext;
|
||||||
import org.apache.http.util.EntityUtils;
|
import org.apache.http.util.EntityUtils;
|
||||||
|
@ -24,11 +35,11 @@ import us.codecraft.webmagic.proxy.ProxyProvider;
|
||||||
import us.codecraft.webmagic.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
import us.codecraft.webmagic.utils.CharsetUtils;
|
import us.codecraft.webmagic.utils.CharsetUtils;
|
||||||
import us.codecraft.webmagic.utils.HttpClientUtils;
|
import us.codecraft.webmagic.utils.HttpClientUtils;
|
||||||
|
import us.codecraft.webmagic.utils.HttpConstant;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -88,7 +99,7 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
int statusCode = 0;
|
int statusCode = 0;
|
||||||
Site site = task.getSite();
|
Site site = task.getSite();
|
||||||
Proxy proxy = null;
|
Proxy proxy = null;
|
||||||
HttpContext httpContext = new BasicHttpContext();
|
HttpClientContext httpContext = new HttpClientContext();
|
||||||
if (proxyProvider != null) {
|
if (proxyProvider != null) {
|
||||||
proxy = proxyProvider.getProxy(task);
|
proxy = proxyProvider.getProxy(task);
|
||||||
AuthState authState = new AuthState();
|
AuthState authState = new AuthState();
|
||||||
|
@ -97,6 +108,18 @@ public class HttpClientDownloader extends AbstractDownloader {
|
||||||
}
|
}
|
||||||
CloseableHttpClient httpClient = getHttpClient(site);
|
CloseableHttpClient httpClient = getHttpClient(site);
|
||||||
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy);
|
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy);
|
||||||
|
if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) {
|
||||||
|
CookieStore cookieStore = new BasicCookieStore();
|
||||||
|
for (Cookie c : request.getCookies()) {
|
||||||
|
cookieStore.addCookie(c);
|
||||||
|
}
|
||||||
|
httpContext.setCookieStore(cookieStore);
|
||||||
|
}
|
||||||
|
if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) {
|
||||||
|
for (Header h : request.getHeaders()) {
|
||||||
|
httpUriRequest.setHeader(h);
|
||||||
|
}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
httpResponse = httpClient.execute(httpUriRequest, httpContext);
|
httpResponse = httpClient.execute(httpUriRequest, httpContext);
|
||||||
statusCode = httpResponse.getStatusLine().getStatusCode();
|
statusCode = httpResponse.getStatusLine().getStatusCode();
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
package us.codecraft.webmagic.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import org.apache.http.HttpHost;
|
import org.apache.http.HttpHost;
|
||||||
import org.apache.http.NameValuePair;
|
|
||||||
import org.apache.http.client.config.CookieSpecs;
|
import org.apache.http.client.config.CookieSpecs;
|
||||||
import org.apache.http.client.config.RequestConfig;
|
import org.apache.http.client.config.RequestConfig;
|
||||||
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
|
||||||
import org.apache.http.client.methods.HttpUriRequest;
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
import org.apache.http.client.methods.RequestBuilder;
|
import org.apache.http.client.methods.RequestBuilder;
|
||||||
import org.apache.http.message.BasicNameValuePair;
|
import org.apache.http.entity.ByteArrayEntity;
|
||||||
import us.codecraft.webmagic.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.webmagic.proxy.Proxy;
|
import us.codecraft.webmagic.proxy.Proxy;
|
||||||
import us.codecraft.webmagic.utils.HttpConstant;
|
import us.codecraft.webmagic.utils.HttpConstant;
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -53,32 +47,27 @@ public class HttpUriRequestConverter {
|
||||||
String method = request.getMethod();
|
String method = request.getMethod();
|
||||||
if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
|
if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
|
||||||
//default get
|
//default get
|
||||||
return addQueryParams(RequestBuilder.get(),request.getParams());
|
return RequestBuilder.get();
|
||||||
} else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
|
} else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
|
||||||
return addFormParams(RequestBuilder.post(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams());
|
return addFormParams(RequestBuilder.post(),request);
|
||||||
} else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
|
} else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
|
||||||
return addQueryParams(RequestBuilder.head(),request.getParams());
|
return RequestBuilder.head();
|
||||||
} else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
|
} else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
|
||||||
return addFormParams(RequestBuilder.put(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams());
|
return addFormParams(RequestBuilder.put(), request);
|
||||||
} else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
|
} else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
|
||||||
return addQueryParams(RequestBuilder.delete(),request.getParams());
|
return RequestBuilder.delete();
|
||||||
} else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
|
} else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
|
||||||
return addQueryParams(RequestBuilder.trace(),request.getParams());
|
return RequestBuilder.trace();
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("Illegal HTTP Method " + method);
|
throw new IllegalArgumentException("Illegal HTTP Method " + method);
|
||||||
}
|
}
|
||||||
|
|
||||||
private RequestBuilder addFormParams(RequestBuilder requestBuilder, NameValuePair[] nameValuePair, Map<String, String> params) {
|
private RequestBuilder addFormParams(RequestBuilder requestBuilder, Request request) {
|
||||||
List<NameValuePair> allNameValuePair=new ArrayList<NameValuePair>();
|
if (request.getRequestBody() != null) {
|
||||||
if (nameValuePair != null && nameValuePair.length > 0) {
|
ByteArrayEntity entity = new ByteArrayEntity(request.getRequestBody().getBody());
|
||||||
allNameValuePair= Arrays.asList(nameValuePair);
|
entity.setContentType(request.getRequestBody().getContentType());
|
||||||
|
requestBuilder.setEntity(entity);
|
||||||
}
|
}
|
||||||
if (params != null) {
|
|
||||||
for (String key : params.keySet()) {
|
|
||||||
allNameValuePair.add(new BasicNameValuePair(key, params.get(key)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
requestBuilder.setEntity(new UrlEncodedFormEntity(allNameValuePair, Charset.forName("utf8")));
|
|
||||||
return requestBuilder;
|
return requestBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
package us.codecraft.webmagic.model;
|
||||||
|
|
||||||
|
import org.apache.http.NameValuePair;
|
||||||
|
import org.apache.http.client.utils.URLEncodedUtils;
|
||||||
|
import org.apache.http.message.BasicNameValuePair;
|
||||||
|
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
* Date: 17/4/8
|
||||||
|
*/
|
||||||
|
public class HttpRequestBody {
|
||||||
|
|
||||||
|
public static abstract class ContentType {
|
||||||
|
|
||||||
|
public static final String JSON = "application/json";
|
||||||
|
|
||||||
|
public static final String XML = "text/xml";
|
||||||
|
|
||||||
|
public static final String FORM = "application/x-www-form-urlencoded";
|
||||||
|
|
||||||
|
public static final String MULTIPART = "multipart/form-data";
|
||||||
|
}
|
||||||
|
|
||||||
|
private final byte[] body;
|
||||||
|
|
||||||
|
private final String contentType;
|
||||||
|
|
||||||
|
private final String encoding;
|
||||||
|
|
||||||
|
public HttpRequestBody(byte[] body, String contentType, String encoding) {
|
||||||
|
this.body = body;
|
||||||
|
this.contentType = contentType;
|
||||||
|
this.encoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContentType() {
|
||||||
|
return contentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEncoding() {
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HttpRequestBody json(String json, String encoding) throws UnsupportedEncodingException {
|
||||||
|
return new HttpRequestBody(json.getBytes(encoding), ContentType.JSON, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HttpRequestBody xml(String xml, String encoding) throws UnsupportedEncodingException {
|
||||||
|
return new HttpRequestBody(xml.getBytes(encoding), ContentType.XML, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HttpRequestBody custom(byte[] body, String contentType, String encoding) throws UnsupportedEncodingException {
|
||||||
|
return new HttpRequestBody(body, contentType, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HttpRequestBody form(Map<String,Object> params, String encoding) throws UnsupportedEncodingException {
|
||||||
|
List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(params.size());
|
||||||
|
for (Map.Entry<String, Object> entry : params.entrySet()) {
|
||||||
|
nameValuePairs.add(new BasicNameValuePair(entry.getKey(), String.valueOf(entry.getValue())));
|
||||||
|
}
|
||||||
|
return new HttpRequestBody(URLEncodedUtils.format(nameValuePairs, encoding).getBytes(encoding), ContentType.FORM, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getBody() {
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,7 +26,7 @@ public abstract class CharsetUtils {
|
||||||
// charset
|
// charset
|
||||||
// 1、encoding in http header Content-Type
|
// 1、encoding in http header Content-Type
|
||||||
charset = UrlUtils.getCharset(contentType);
|
charset = UrlUtils.getCharset(contentType);
|
||||||
if (StringUtils.isNotBlank(contentType)) {
|
if (StringUtils.isNotBlank(contentType) && StringUtils.isNotBlank(charset)) {
|
||||||
logger.debug("Auto get charset: {}", charset);
|
logger.debug("Auto get charset: {}", charset);
|
||||||
return charset;
|
return charset;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue