invite kotlin experimental
parent
3c653d941a
commit
818a2b2408
|
@ -9,6 +9,9 @@
|
|||
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-scripts</artifactId>
|
||||
<properties>
|
||||
<kotlin.version>1.1.2-2</kotlin.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -16,6 +19,12 @@
|
|||
<artifactId>jruby</artifactId>
|
||||
<version>1.7.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-stdlib</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.groovy</groupId>
|
||||
<artifactId>groovy-all</artifactId>
|
||||
|
@ -48,6 +57,7 @@
|
|||
</dependencies>
|
||||
|
||||
<build>
|
||||
<sourceDirectory>${project.basedir}/src/main/kotlin,${project.basedir}/src/main/groovy,${project.basedir}/src/main/java</sourceDirectory>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
import us.codecraft.webmagic.Page
|
||||
import us.codecraft.webmagic.Site
|
||||
import us.codecraft.webmagic.Spider
|
||||
import us.codecraft.webmagic.processor.PageProcessor
|
||||
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
|
||||
|
||||
/**
|
||||
*
|
||||
* @author code4crafter@gmail.com
|
||||
* Date: 2017/5/31
|
||||
* Time: 下午11:33
|
||||
*
|
||||
*/
|
||||
class GithubRepoPageProcessor : PageProcessor {
|
||||
|
||||
private val site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000)
|
||||
|
||||
override fun process(page: Page) {
|
||||
page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all())
|
||||
page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-])").all())
|
||||
page.putField("author", page.url.regex("https://github\\.com/(\\w+)/.*").toString())
|
||||
page.putField("name", page.html.xpath("//h1[@class='public']/strong/a/text()").toString())
|
||||
if (page.resultItems.get<Any>("name") == null) {
|
||||
//skip this page
|
||||
page.setSkip(true)
|
||||
}
|
||||
page.putField("readme", page.html.xpath("//div[@id='readme']/tidyText()"))
|
||||
}
|
||||
|
||||
override fun getSite(): Site {
|
||||
return site
|
||||
}
|
||||
|
||||
companion object {
|
||||
@JvmStatic fun main(args: Array<String>) {
|
||||
Spider.create(GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run()
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue