diff --git a/asserts/webmagic-create-spider.bmml b/asserts/webmagic-create-spider.bmml new file mode 100644 index 0000000..7617042 --- /dev/null +++ b/asserts/webmagic-create-spider.bmml @@ -0,0 +1,440 @@ + + + + + Create%20Spider%0Ahttp%3A//localhost%3A8080/spider/create + + + + + + + true + Custom%20PageProcessor%20 + + + + + + + true + BlogSpider + + + + + true + SpiderTemplate + + + + + true + New%20Template + + + + + true + Title + + + + + true + //title/text%28%29 + + + + + true + Content + + + + + true + //div%5B@class%3D%27BlogContent%27%5D/text%28%29 + + + + + true + Date + + + + + true + //div%5B@class%3D%27BlogStat%27%5D/regex%28%27%5Cd+-%5Cd+-%5Cd+%5Cs+%5Cd+%3A%5Cd+%27%29 + + + + + true + Tags + + + + + true + //div%5B@class%3D%27tags%27%5D/a/text%28%29 + + + + + + + + + + + Create%20Spider + + + + + Name + + + + + blog.oschina.net + + + + + StartUrls + + + + + http%3A//my.oschina.net/flashsword/blog/180623 + + + + + Other%20Source + + + + + up + Advanced%20Setting + + + + + + + + + URL%20manangement + + + + + Scheduler + + + + + Host + + + + + Redis + + + + + 127.0.0.1 + + + + + 6379 + + + + + + + true + New%20Scheduler + + + + + + + Persistent + + + + + Pipeline + + + + + Path + + + + + Local%20File + + + + + /data/webmaigc/%7BspdierName%7D + + + + + + + true + New%20Pipeline + + + + + Create + + + + + Cancel + + + + + + + + + Advanced%20Setting + + + + + + + Headers + + + + + true + User%20Agent + + + + + true + Cookie + + + + + true + Mozilla/5.0%20%28compatible%3B%20MSIE%2010.0... + + + + + true + id + + + + + Add + + + + + true + name + + + + + true + value + + + + + Add + + + + + Add + + + + + true + 123456 + + + + + + + + + true + Proxy + + + + + true + 127.0.0.1 + + + + + true + 8080 + + + + + true + username + + + + + true + password + + + + + + + + + true + Charset + + + + + true + utf-8 + + + + + AutoDetect + + + + + + + + + true + Frenquecny + + + + + true + 3000 + + + + + true + Sleep + + + + + true + milliseconds%20after%20download%20one%20page + + + + + + + + + + + Error%20Handle + + + + + + + true + Retry + + + + + true + 3 + + + + + true + Retry + + + + + true + times%20when%20downloading%20a%20page + + + + + true + If%20it%20still%20fails%20in%20downloading%2C%20re-insert%20it%20to%20url%20queue.%5Cr%5CrAfter%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20times%2C%20the%20url%20will%20be%20discarded. + + + + + true + 3 + + + + + + + + + + + Test + + + + + import + + + + + export + + + + \ No newline at end of file diff --git a/asserts/webmagic-create-spider.png b/asserts/webmagic-create-spider.png new file mode 100644 index 0000000..8fe92c4 Binary files /dev/null and b/asserts/webmagic-create-spider.png differ diff --git a/asserts/webmagic-spider-manage.bmml b/asserts/webmagic-spider-manage.bmml new file mode 100644 index 0000000..1423b01 --- /dev/null +++ b/asserts/webmagic-spider-manage.bmml @@ -0,0 +1,110 @@ + + + + + Spider%20List%20Page%0Ahttp%3A//localhost%3A8080/spider/list + + + + + Spider%20%2C%20Add%20Time%20%5Ev%2CPages%20Total%20%5Ev%2C%20Pages%20Downloaded%20%5Ev%2C%20Error%20%5Ev%2C%20%20Operation%0Agithub.com%2C%202014-3-1.12%3A20%3A10%2C1221%2C%20595%2C%204%2C%20Stop%20Edit%20Delete%0Aoschina.net%2C2014-2-12.16%3A10%3A20%2C120%2C%20%20120%2C%200%2C%20Start%20Edit%20Delete%0Aappstore.com%2C2014-2-10.9%3A20%3A10%2C100000%2C100000%2C%200%2CStart%20Edit%20Delete + + + + + Works + + + + + + + selected + 10.1.2.1 + + + + + 10.1.2.2 + + + + + selected + 10.1.2.3 + + + + + 10.1.2.4 + + + + + all + + + + + + + + + + Real%20Time + + + + + Pages + + + + + Time + + + + + + + Keyword + + + + + Search + + + + + Spiders + + + + + Charts + + + + + 2014-2-1 + + + + + 2014-3-1 + + + + + Time%20from + + + + + to + + + + \ No newline at end of file diff --git a/asserts/webmagic-spider-manage.png b/asserts/webmagic-spider-manage.png new file mode 100644 index 0000000..8fbdb6a Binary files /dev/null and b/asserts/webmagic-spider-manage.png differ