diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index 0817335..a84ba48 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -8,8 +8,8 @@ import java.util.*;
* Object contains setting for crawler.
*
* @author code4crafter@gmail.com
- * @since 0.1.0
* @see us.codecraft.webmagic.processor.PageProcessor
+ * @since 0.1.0
*/
public class Site {
@@ -38,6 +38,14 @@ public class Site {
private Set acceptStatCode = DEFAULT_STATUS_CODE_SET;
+ private Map headers = new HashMap();
+
+ public static interface HeaderConst {
+
+ public static final String REFERER = "Referer";
+ }
+
+
static {
DEFAULT_STATUS_CODE_SET.add(200);
}
@@ -139,10 +147,12 @@ public class Site {
/**
* set timeout for downloader in ms
+ *
* @param timeOut
*/
- public void setTimeOut(int timeOut) {
+ public Site setTimeOut(int timeOut) {
this.timeOut = timeOut;
+ return this;
}
/**
@@ -216,7 +226,7 @@ public class Site {
}
/**
- * Get retry times when download fail immediately, 0 by default.
+ * Get retry times immediately when download fail, 0 by default.
*
* @return retry times when download fail
*/
@@ -224,6 +234,22 @@ public class Site {
return retryTimes;
}
+ public Map getHeaders() {
+ return headers;
+ }
+
+ /**
+ * Put an Http header for downloader.
+ * Use {@link #addCookie(String, String)} for cookie and {@link #setUserAgent(String)} for user-agent.
+ * @param key key of http header, there are some keys constant in {@link HeaderConst}
+ * @param value value of header
+ * @return
+ */
+ public Site addHeader(String key, String value){
+ headers.put(key,value);
+ return this;
+ }
+
/**
* Set retry times when download fail, 0 by default.
*
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index 82a4a9a..b6f0034 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -19,6 +19,7 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.IOException;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
@@ -66,10 +67,12 @@ public class HttpClientDownloader implements Downloader {
int retryTimes = 0;
Set acceptStatCode;
String charset = null;
+ Map headers = null;
if (site != null) {
retryTimes = site.getRetryTimes();
acceptStatCode = site.getAcceptStatCode();
charset = site.getCharset();
+ headers = site.getHeaders();
} else {
acceptStatCode = new HashSet();
acceptStatCode.add(200);
@@ -78,6 +81,11 @@ public class HttpClientDownloader implements Downloader {
HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site);
try {
HttpGet httpGet = new HttpGet(request.getUrl());
+ if (headers!=null){
+ for (Map.Entry headerEntry : headers.entrySet()) {
+ httpGet.addHeader(headerEntry.getKey(),headerEntry.getValue());
+ }
+ }
HttpResponse httpResponse = null;
int tried = 0;
boolean retry;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java
index 52e2f99..c256ac4 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java
@@ -54,7 +54,7 @@ public class HttpClientPool {
}
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, site.getTimeOut());
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, site.getTimeOut());
-
+ params.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1);
if (site != null && site.getCharset() != null) {
@@ -73,8 +73,7 @@ public class HttpClientPool {
if (site != null) {
generateCookie(httpClient, site);
}
- httpClient.getParams().setIntParameter("http.socket.timeout", 60000);
- httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
+
return httpClient;
}