yihua.huang
|
6201fd6966
|
add worker as container
|
2014-03-17 23:01:58 +08:00 |
yihua.huang
|
6c11718566
|
Clean project structure #70
|
2014-03-14 23:24:38 +08:00 |
yihua.huang
|
9606a173cd
|
fix ZipCodePageProcessor
|
2014-03-13 22:55:50 +08:00 |
yihua.huang
|
757cc9b942
|
[maven-release-plugin] prepare for next development iteration
|
2014-03-13 07:49:51 +08:00 |
yihua.huang
|
63ffb5c792
|
[maven-release-plugin] prepare release webmaigc-0.4.3
|
2014-03-13 07:49:27 +08:00 |
yihua.huang
|
66d4d3c192
|
Merge branch 'master' into 0.4.x
|
2014-03-13 07:12:29 +08:00 |
yihua.huang
|
af07280176
|
remove defend code for httpclient 4.3.1 because it is fixed in 4.3.3 #59
|
2014-03-13 07:11:56 +08:00 |
yihua.huang
|
d5a978e00f
|
update version back to 0.4.3
|
2014-03-13 06:55:05 +08:00 |
yihua.huang
|
55368919df
|
add attribute 'text' support for CssSelector #66
|
2014-03-11 13:18:34 +08:00 |
yihua.huang
|
88b50d4182
|
bigfix: cycleTry will not work when spawnUrl is set to false #62
|
2014-03-04 07:33:07 +08:00 |
yihua.huang
|
2768a1cae4
|
add test for cycleTriedTimes and fix cycleTriedTimes inc error #60
|
2014-03-01 15:10:38 +08:00 |
yihua.huang
|
bbd0d7e600
|
update httpclient version to 4.3.3 #59
|
2014-02-28 21:17:02 +08:00 |
yihua.huang
|
571061454a
|
#58 add CYCLE_TRIED_TIMES support to QueueScheduler and PriorityScheduler
|
2014-02-27 23:54:30 +08:00 |
yihua.huang
|
0e98183f74
|
Change log4j to slf4j #55
|
2014-02-12 09:35:57 +08:00 |
yihua.huang
|
fa33b15843
|
property loader
|
2014-02-11 23:07:31 +08:00 |
yihua.huang
|
af809c4d55
|
update version to 0.5.0-snapshot
|
2014-02-11 22:16:01 +08:00 |
Almark Ming
|
2b46b11e55
|
Update RegexSelector.java
Optimize regex format check
Conflicts:
webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
|
2013-12-21 08:38:17 +08:00 |
yihua.huang
|
b51fb2696b
|
update ut for cookie
|
2013-12-06 00:30:01 +08:00 |
yihua.huang
|
ff2f588c41
|
#48 nullpointer exception
|
2013-12-04 22:11:20 +08:00 |
yihua.huang
|
d274310cb2
|
[maven-release-plugin] prepare for next development iteration
|
2013-12-03 23:35:06 +08:00 |
yihua.huang
|
e8c32a32dc
|
[maven-release-plugin] prepare release webmagic-0.4.2
|
2013-12-03 23:34:57 +08:00 |
yihua.huang
|
6a828e923c
|
#46 Downloader thread hang up when timeout
|
2013-12-03 09:59:54 +08:00 |
shijinping
|
9a524aa364
|
double-check 中再取次httpClient的内容
|
2013-11-28 14:38:30 +08:00 |
yihua.huang
|
e7083dc39d
|
[maven-release-plugin] prepare for next development iteration
|
2013-11-28 13:04:32 +08:00 |
yihua.huang
|
ae623567b3
|
[maven-release-plugin] prepare release webmagic-0.4.1
|
2013-11-28 13:04:22 +08:00 |
yihua.huang
|
59ad4cad27
|
#42 Add jsonpath in annotation mode for json result
|
2013-11-28 08:25:16 +08:00 |
yihua.huang
|
c2d6d495b3
|
#41 add getThreadAlive(),getStatus,getPageCount() to spider
|
2013-11-28 07:59:24 +08:00 |
yihua.huang
|
cf62d707e0
|
#36 Spider does not exit when success
|
2013-11-27 23:33:18 +08:00 |
yihua.huang
|
a01312930a
|
#39 Parsing html after page.getHtml()
|
2013-11-27 22:01:34 +08:00 |
yihua.huang
|
f63d33b457
|
update some comments
|
2013-11-27 21:06:53 +08:00 |
yihua.huang
|
04fcf3193f
|
#38 Change algorithm of SmartContentSelector
|
2013-11-23 13:56:55 +08:00 |
yihua.huang
|
296a68920e
|
fix javadoc and add setPipelines() for spider
|
2013-11-14 13:23:29 +08:00 |
yihua.huang
|
47a0360783
|
#35 add status code to page
|
2013-11-12 11:51:34 +08:00 |
yihua.huang
|
bc5c30de17
|
update scripts
|
2013-11-12 08:20:59 +08:00 |
yihua.huang
|
f9daae39cf
|
[maven-release-plugin] prepare for next development iteration
|
2013-11-11 14:33:11 +08:00 |
yihua.huang
|
fdb9441519
|
[maven-release-plugin] prepare release webmagic-0.4.0
|
2013-11-11 14:33:01 +08:00 |
yihua.huang
|
1d75ae7f5b
|
rollback version to 0.4.0 because not deploy success
|
2013-11-11 11:52:56 +08:00 |
yihua.huang
|
df8ca8ad09
|
add scripts
|
2013-11-10 22:30:48 +08:00 |
yihua.huang
|
775eb9732f
|
[maven-release-plugin] prepare for next development iteration
|
2013-11-06 22:17:58 +08:00 |
yihua.huang
|
0b4fadc24d
|
[maven-release-plugin] prepare release webmagic-0.4.0
|
2013-11-06 22:17:47 +08:00 |
yihua.huang
|
fe6d9bb2e2
|
get keep-alive rework
|
2013-11-06 21:53:39 +08:00 |
yihua.huang
|
fd6d2fd6f8
|
try to keepalive TCP connection
|
2013-11-06 21:19:14 +08:00 |
yihua.huang
|
425df08523
|
update version to 0.4.0
|
2013-11-06 12:50:45 +08:00 |
yihua.huang
|
e046bb0723
|
remove useless code
|
2013-11-06 12:48:14 +08:00 |
yihua.huang
|
6e32a19f80
|
update api for direct download
|
2013-11-06 12:46:50 +08:00 |
yihua.huang
|
807aefe9df
|
change EntityUtil to IOUtil because some encoding error
|
2013-11-06 07:37:34 +08:00 |
yihua.huang
|
00b0a751b4
|
#33 ignore 'content-encoding' when redirect
|
2013-11-06 06:57:58 +08:00 |
yihua.huang
|
8f774afc84
|
add direct download
|
2013-11-06 06:41:04 +08:00 |
yihua.huang
|
c18b603399
|
optimize long compare
|
2013-11-04 07:09:44 +08:00 |
yihua.huang
|
ed3f3583cc
|
downloader refactor
|
2013-11-04 01:03:23 +08:00 |
yihua.huang
|
a37f40e6e6
|
add cookie supoort
|
2013-11-04 00:59:48 +08:00 |
yihua.huang
|
3c6fced48e
|
update connection client
|
2013-11-04 00:53:01 +08:00 |
yihua.huang
|
09153ff715
|
#22 http proxy support #32 update httpclient to 4.3.1
|
2013-11-04 00:47:09 +08:00 |
yihua.huang
|
edfc319c45
|
update httpclient to 4.3.1
|
2013-11-04 00:06:30 +08:00 |
yihua.huang
|
160a149b05
|
todo bugfix
|
2013-11-03 23:10:09 +08:00 |
yihua.huang
|
583a0eba8c
|
#29 refactor some method name
|
2013-11-03 20:24:26 +08:00 |
yihua.huang
|
6fa82a418b
|
#29 seed urls with more information
|
2013-11-03 20:20:50 +08:00 |
yihua.huang
|
1446ada732
|
some refactor
|
2013-10-31 22:50:22 +08:00 |
yihua.huang
|
84976c81ec
|
remove useless code
|
2013-10-31 22:48:18 +08:00 |
yihua.huang
|
b4fcf41168
|
add exit when comlete option
|
2013-10-31 22:41:02 +08:00 |
yihua.huang
|
352887870c
|
remove shutdown call
|
2013-10-31 22:22:14 +08:00 |
yihua.huang
|
a3f9ad198f
|
refactor multi thread code in Spider
|
2013-10-31 21:52:43 +08:00 |
yihua.huang
|
7fb44d2eec
|
#30 reuse PoolingClientConnectionManager for HttpClientDownloader
|
2013-10-14 23:22:04 +08:00 |
yihua.huang
|
5a226387e0
|
#27 nullpointer fix
|
2013-10-11 11:32:44 +08:00 |
yihua.huang
|
16e12e3bc9
|
#27 customize http header for downloader
|
2013-10-11 08:37:21 +08:00 |
yihua.huang
|
1a2c84ea78
|
#27 add timeout config to site
|
2013-10-11 07:36:16 +08:00 |
yihua.huang
|
4acbc19cee
|
[maven-release-plugin] prepare for next development iteration
|
2013-09-23 13:12:32 +08:00 |
yihua.huang
|
cc3b787991
|
[maven-release-plugin] prepare release webmagic-0.3.2
|
2013-09-23 13:12:19 +08:00 |
yihua.huang
|
b131878123
|
add example
|
2013-09-23 13:01:28 +08:00 |
yihua.huang
|
95ab4edec3
|
some bugfix
|
2013-09-23 08:38:54 +08:00 |
yihua.huang
|
fba330872b
|
fix a thread pool exception
|
2013-09-22 23:57:15 +08:00 |
yihua.huang
|
3c79d031bd
|
fix thread pool
|
2013-09-22 22:52:52 +08:00 |
yihua.huang
|
fb693a4ac4
|
[maven-release-plugin] prepare for next development iteration
|
2013-09-08 22:25:07 +08:00 |
yihua.huang
|
bfaaa042b9
|
[maven-release-plugin] prepare release webmagic-parent-0.3.1
|
2013-09-08 22:24:48 +08:00 |
yihua.huang
|
c17a31a21d
|
fix null pointe exception #26
|
2013-09-08 21:09:49 +08:00 |
yihua.huang
|
d2e0f0cd33
|
#25 use URL api in UrlUtils.canonicalizeUrl()
|
2013-09-06 21:35:23 +08:00 |
yihua.huang
|
ef4cf49fee
|
add stop method to spider #24
|
2013-09-06 21:17:36 +08:00 |
yihua.huang
|
692de76f86
|
fix issue #21 charset detect error
|
2013-09-04 15:27:51 +08:00 |
yihua.huang
|
e7bf425df4
|
[maven-release-plugin] prepare for next development iteration
|
2013-09-04 10:51:01 +08:00 |
yihua.huang
|
77ff252316
|
[maven-release-plugin] prepare release webmagic-0.3.0
|
2013-09-04 10:50:50 +08:00 |
yihua.huang
|
1fc8e104ab
|
add cycle retry
|
2013-09-04 10:32:13 +08:00 |
yihua.huang
|
d141541ef3
|
add retry
|
2013-09-04 09:57:19 +08:00 |
yihua.huang
|
a1ef2523cc
|
update xsoup version
|
2013-09-04 09:38:40 +08:00 |
yihua.huang
|
aefd0569a5
|
update version
|
2013-09-04 09:36:56 +08:00 |
yihua.huang
|
194518fd82
|
add switch
|
2013-09-04 08:21:34 +08:00 |
yihua.huang
|
326b97c65a
|
update
|
2013-09-04 00:15:54 +08:00 |
yihua.huang
|
2c3574537a
|
refactor in selectors
|
2013-09-02 14:14:24 +08:00 |
yihua.huang
|
85b7cf1563
|
complete test
|
2013-09-02 13:52:41 +08:00 |
yihua.huang
|
d7cd9e5747
|
update pom
|
2013-09-02 11:56:01 +08:00 |
yihua.huang
|
55d4a76ab7
|
newselectors
|
2013-09-02 08:21:32 +08:00 |
yihua.huang
|
d7abbd0e4b
|
fix compile error
|
2013-08-25 16:31:00 +08:00 |
yihua.huang
|
5e9e8b2541
|
add TextContentSelector
|
2013-08-25 16:30:38 +08:00 |
yihua.huang
|
0cc0ccee35
|
add charset specific for easy call of HttpClientDownloader
|
2013-08-25 15:41:43 +08:00 |
yihua.huang
|
91dcccf7b5
|
add a sample
|
2013-08-21 21:55:15 +08:00 |
yihua.huang
|
ad66d33f38
|
[maven-release-plugin] prepare for next development iteration
|
2013-08-20 23:39:59 +08:00 |
yihua.huang
|
9dc6b11954
|
[maven-release-plugin] prepare release webmagic-parent-0.2.1
|
2013-08-20 23:37:55 +08:00 |
yihua.huang
|
4f62dfc8a4
|
release
|
2013-08-20 23:37:20 +08:00 |
yihua.huang
|
74c940c758
|
[maven-release-plugin] prepare for next development iteration
|
2013-08-20 23:19:58 +08:00 |
yihua.huang
|
a4bb4e3429
|
[maven-release-plugin] prepare release webmagic-parent-0.2.1
|
2013-08-20 23:19:27 +08:00 |
yihua.huang
|
194f16aa75
|
update
|
2013-08-20 23:16:43 +08:00 |