set selenium dep to seperate package
parent
bd1384a513
commit
f1573b40a2
11
pom.xml
11
pom.xml
|
@ -8,10 +8,12 @@
|
|||
<packaging>pom</packaging>
|
||||
<artifactId>webmagic</artifactId>
|
||||
|
||||
<modules>
|
||||
<module>webmagic-core</module>
|
||||
<module>webmagic-extension/</module>
|
||||
<module>webmagic-samples/</module>
|
||||
<modules>
|
||||
<module>webmagic-core</module>
|
||||
<module>webmagic-extension/</module>
|
||||
<module>webmagic-samples/</module>
|
||||
<module>webmagic-selenium/</module>
|
||||
<module>webmagic-lucene/</module>
|
||||
</modules>
|
||||
|
||||
<dependencyManagement>
|
||||
|
@ -143,5 +145,4 @@
|
|||
</build>
|
||||
|
||||
|
||||
|
||||
</project>
|
||||
|
|
|
@ -22,11 +22,6 @@
|
|||
<artifactId>jedis</artifactId>
|
||||
<version>2.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>2.33.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-core</artifactId>
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
||||
<id>jar-with-dependencies</id>
|
||||
<formats>
|
||||
<format>jar</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>${project.basedir}/target/classes</directory>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<unpack>false</unpack>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -0,0 +1,3 @@
|
|||
webmagic-extension
|
||||
-------
|
||||
webmagic的扩展模块。包括注解格式定义爬虫、JSON、分布式等支持。
|
|
@ -0,0 +1,31 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic</artifactId>
|
||||
<version>0.2.0</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>webmagic-selenium</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>2.33.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
|
@ -1,4 +1,4 @@
|
|||
package us.codecraft.webmagic.samples.selenium;
|
||||
package us.codecraft.webmagic.samples;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
|
@ -6,7 +6,6 @@ import us.codecraft.webmagic.Spider;
|
|||
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
|
||||
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.scheduler.RedisScheduler;
|
||||
|
||||
/**
|
||||
* 花瓣网抽取器。<br>
|
||||
|
@ -39,7 +38,6 @@ public class HuabanProcessor implements PageProcessor {
|
|||
|
||||
public static void main(String[] args) {
|
||||
Spider.create(new HuabanProcessor()).thread(5)
|
||||
.scheduler(new RedisScheduler("localhost"))
|
||||
.pipeline(new FilePipeline("/data/webmagic/test/"))
|
||||
.downloader(new SeleniumDownloader("/Users/yihua/Downloads/chromedriver"))
|
||||
.runAsync();
|
Loading…
Reference in New Issue