diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index b126d9f..404a6dd 100755
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -9,6 +9,9 @@
us.codecraft
webmagic-scripts
+
+ 1.1.2-2
+
@@ -16,6 +19,12 @@
jruby
1.7.6
+
+ org.jetbrains.kotlin
+ kotlin-stdlib
+ ${kotlin.version}
+
+
org.codehaus.groovy
groovy-all
@@ -48,6 +57,7 @@
+ ${project.basedir}/src/main/kotlin,${project.basedir}/src/main/groovy,${project.basedir}/src/main/java
maven-compiler-plugin
diff --git a/webmagic-scripts/src/main/kotlin/Github.kt b/webmagic-scripts/src/main/kotlin/Github.kt
new file mode 100644
index 0000000..3d6ca21
--- /dev/null
+++ b/webmagic-scripts/src/main/kotlin/Github.kt
@@ -0,0 +1,40 @@
+
+import us.codecraft.webmagic.Page
+import us.codecraft.webmagic.Site
+import us.codecraft.webmagic.Spider
+import us.codecraft.webmagic.processor.PageProcessor
+import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
+
+/**
+ *
+ * @author code4crafter@gmail.com
+ * Date: 2017/5/31
+ * Time: 下午11:33
+ *
+ */
+class GithubRepoPageProcessor : PageProcessor {
+
+ private val site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000)
+
+ override fun process(page: Page) {
+ page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all())
+ page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-])").all())
+ page.putField("author", page.url.regex("https://github\\.com/(\\w+)/.*").toString())
+ page.putField("name", page.html.xpath("//h1[@class='public']/strong/a/text()").toString())
+ if (page.resultItems.get("name") == null) {
+ //skip this page
+ page.setSkip(true)
+ }
+ page.putField("readme", page.html.xpath("//div[@id='readme']/tidyText()"))
+ }
+
+ override fun getSite(): Site {
+ return site
+ }
+
+ companion object {
+ @JvmStatic fun main(args: Array) {
+ Spider.create(GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run()
+ }
+ }
+}