set selenium dep to seperate package
parent
bd1384a513
commit
f1573b40a2
11
pom.xml
11
pom.xml
|
@ -8,10 +8,12 @@
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<artifactId>webmagic</artifactId>
|
<artifactId>webmagic</artifactId>
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
<module>webmagic-core</module>
|
<module>webmagic-core</module>
|
||||||
<module>webmagic-extension/</module>
|
<module>webmagic-extension/</module>
|
||||||
<module>webmagic-samples/</module>
|
<module>webmagic-samples/</module>
|
||||||
|
<module>webmagic-selenium/</module>
|
||||||
|
<module>webmagic-lucene/</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
|
@ -143,5 +145,4 @@
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -22,11 +22,6 @@
|
||||||
<artifactId>jedis</artifactId>
|
<artifactId>jedis</artifactId>
|
||||||
<version>2.0.0</version>
|
<version>2.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.seleniumhq.selenium</groupId>
|
|
||||||
<artifactId>selenium-java</artifactId>
|
|
||||||
<version>2.33.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>webmagic-core</artifactId>
|
<artifactId>webmagic-core</artifactId>
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
<assembly
|
|
||||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
|
||||||
<id>jar-with-dependencies</id>
|
|
||||||
<formats>
|
|
||||||
<format>jar</format>
|
|
||||||
</formats>
|
|
||||||
<includeBaseDirectory>false</includeBaseDirectory>
|
|
||||||
<fileSets>
|
|
||||||
<fileSet>
|
|
||||||
<directory>${project.basedir}/target/classes</directory>
|
|
||||||
<outputDirectory>/</outputDirectory>
|
|
||||||
</fileSet>
|
|
||||||
</fileSets>
|
|
||||||
<dependencySets>
|
|
||||||
<dependencySet>
|
|
||||||
<unpack>false</unpack>
|
|
||||||
</dependencySet>
|
|
||||||
</dependencySets>
|
|
||||||
</assembly>
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
webmagic-extension
|
||||||
|
-------
|
||||||
|
webmagic的扩展模块。包括注解格式定义爬虫、JSON、分布式等支持。
|
|
@ -0,0 +1,31 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<groupId>us.codecraft</groupId>
|
||||||
|
<artifactId>webmagic</artifactId>
|
||||||
|
<version>0.2.0</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>webmagic-selenium</artifactId>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.seleniumhq.selenium</groupId>
|
||||||
|
<artifactId>selenium-java</artifactId>
|
||||||
|
<version>2.33.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>us.codecraft</groupId>
|
||||||
|
<artifactId>webmagic-core</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.webmagic.samples.selenium;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.webmagic.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.webmagic.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
@ -6,7 +6,6 @@ import us.codecraft.webmagic.Spider;
|
||||||
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
|
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
|
||||||
import us.codecraft.webmagic.pipeline.FilePipeline;
|
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||||
import us.codecraft.webmagic.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.webmagic.scheduler.RedisScheduler;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 花瓣网抽取器。<br>
|
* 花瓣网抽取器。<br>
|
||||||
|
@ -39,7 +38,6 @@ public class HuabanProcessor implements PageProcessor {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
Spider.create(new HuabanProcessor()).thread(5)
|
Spider.create(new HuabanProcessor()).thread(5)
|
||||||
.scheduler(new RedisScheduler("localhost"))
|
|
||||||
.pipeline(new FilePipeline("/data/webmagic/test/"))
|
.pipeline(new FilePipeline("/data/webmagic/test/"))
|
||||||
.downloader(new SeleniumDownloader("/Users/yihua/Downloads/chromedriver"))
|
.downloader(new SeleniumDownloader("/Users/yihua/Downloads/chromedriver"))
|
||||||
.runAsync();
|
.runAsync();
|
Loading…
Reference in New Issue