split modules
parent
3c3f001186
commit
6dc88fa111
|
@ -0,0 +1,105 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<groupId>us.codecraft</groupId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>webmagic-core</artifactId>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
<version>4.2.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.7</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>13.0.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
<version>3.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
<version>1.2.17</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-collections</groupId>
|
||||||
|
<artifactId>commons-collections</artifactId>
|
||||||
|
<version>3.2.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.sourceforge.htmlcleaner</groupId>
|
||||||
|
<artifactId>htmlcleaner</artifactId>
|
||||||
|
<version>2.4</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
<version>1.3.2</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-sources</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-javadoc-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-javadocs</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-release-plugin</artifactId>
|
||||||
|
<version>2.0-beta-7</version>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
|
||||||
|
</project>
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import us.codecraft.spider.selector.Selectable;
|
import us.codecraft.webmagic.selector.Selectable;
|
||||||
import us.codecraft.spider.utils.UrlUtils;
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,8 +1,4 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
|
@ -1,17 +1,14 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import us.codecraft.spider.downloader.Downloader;
|
import us.codecraft.webmagic.downloader.Downloader;
|
||||||
import us.codecraft.spider.downloader.HttpClientDownloader;
|
import us.codecraft.webmagic.downloader.HttpClientDownloader;
|
||||||
import us.codecraft.spider.pipeline.ConsolePipeline;
|
import us.codecraft.webmagic.pipeline.ConsolePipeline;
|
||||||
import us.codecraft.spider.pipeline.Pipeline;
|
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
import us.codecraft.spider.schedular.QueueSchedular;
|
import us.codecraft.webmagic.schedular.QueueSchedular;
|
||||||
import us.codecraft.spider.schedular.Schedular;
|
import us.codecraft.webmagic.schedular.Schedular;
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,16 +1,16 @@
|
||||||
package us.codecraft.spider.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.http.HttpResponse;
|
import org.apache.http.HttpResponse;
|
||||||
import org.apache.http.client.HttpClient;
|
import org.apache.http.client.HttpClient;
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.selector.Html;
|
import us.codecraft.webmagic.selector.Html;
|
||||||
import us.codecraft.spider.selector.PlainText;
|
import us.codecraft.webmagic.selector.PlainText;
|
||||||
import us.codecraft.spider.utils.UrlUtils;
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.downloader;
|
package us.codecraft.webmagic.downloader;
|
||||||
|
|
||||||
import org.apache.http.HttpVersion;
|
import org.apache.http.HttpVersion;
|
||||||
import org.apache.http.client.HttpClient;
|
import org.apache.http.client.HttpClient;
|
||||||
|
@ -10,7 +10,7 @@ import org.apache.http.conn.scheme.SchemeRegistry;
|
||||||
import org.apache.http.impl.client.DefaultHttpClient;
|
import org.apache.http.impl.client.DefaultHttpClient;
|
||||||
import org.apache.http.impl.conn.PoolingClientConnectionManager;
|
import org.apache.http.impl.conn.PoolingClientConnectionManager;
|
||||||
import org.apache.http.params.*;
|
import org.apache.http.params.*;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.pipeline;
|
package us.codecraft.webmagic.pipeline;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.selector.Selectable;
|
import us.codecraft.webmagic.selector.Selectable;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
package us.codecraft.spider.pipeline;
|
package us.codecraft.webmagic.pipeline;
|
||||||
|
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.selector.Selectable;
|
import us.codecraft.webmagic.selector.Selectable;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileWriter;
|
import java.io.FileWriter;
|
||||||
|
@ -19,7 +19,7 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class FilePipeline implements Pipeline {
|
public class FilePipeline implements Pipeline {
|
||||||
|
|
||||||
private String path = "/data/temp/spider/";
|
private String path = "/data/temp/webmagic/";
|
||||||
|
|
||||||
public FilePipeline(){
|
public FilePipeline(){
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package us.codecraft.spider.pipeline;
|
package us.codecraft.webmagic.pipeline;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,7 +1,7 @@
|
||||||
package us.codecraft.spider.processor;
|
package us.codecraft.webmagic.processor;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.processor;
|
package us.codecraft.webmagic.processor;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.utils.UrlUtils;
|
import us.codecraft.webmagic.utils.UrlUtils;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package us.codecraft.spider.schedular;
|
package us.codecraft.webmagic.schedular;
|
||||||
|
|
||||||
import org.apache.commons.lang3.math.NumberUtils;
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.schedular;
|
package us.codecraft.webmagic.schedular;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import us.codecraft.spider.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
|
@ -1,7 +1,7 @@
|
||||||
package us.codecraft.spider.schedular;
|
package us.codecraft.webmagic.schedular;
|
||||||
|
|
||||||
import us.codecraft.spider.Request;
|
import us.codecraft.webmagic.Request;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,6 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.htmlcleaner.HtmlCleaner;
|
import org.htmlcleaner.HtmlCleaner;
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.htmlcleaner.*;
|
import org.htmlcleaner.*;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.utils;
|
package us.codecraft.webmagic.utils;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.spider.selector.Html;
|
import us.codecraft.webmagic.selector.Html;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,11 +1,11 @@
|
||||||
package us.codecraft.spider;
|
package us.codecraft.webmagic;
|
||||||
|
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import us.codecraft.spider.pipeline.FilePipeline;
|
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||||
import us.codecraft.spider.processor.SimplePageProcessor;
|
import us.codecraft.webmagic.processor.SimplePageProcessor;
|
||||||
import us.codecraft.spider.samples.HuxiuProcessor;
|
import us.codecraft.webmagic.samples.HuxiuProcessor;
|
||||||
import us.codecraft.spider.schedular.FileCacheQueueSchedular;
|
import us.codecraft.webmagic.schedular.FileCacheQueueSchedular;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
||||||
|
@ -24,12 +24,12 @@ public class SpiderTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGlobalSpider(){
|
public void testGlobalSpider(){
|
||||||
// PageProcessor pageProcessor = new MeicanProcessor();
|
// PageProcessor pageProcessor = new MeicanProcessor();
|
||||||
// Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor.getSite(),"/data/temp/spider/cache/")).
|
// Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor.getSite(),"/data/temp/webmagic/cache/")).
|
||||||
// processor(pageProcessor).run();
|
// processor(pageProcessor).run();
|
||||||
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
|
||||||
pageProcessor2.getSite().setEncoding("GBK");
|
pageProcessor2.getSite().setEncoding("GBK");
|
||||||
System.out.println(pageProcessor2.getSite().getEncoding());
|
System.out.println(pageProcessor2.getSite().getEncoding());
|
||||||
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/spider/cache/")).
|
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")).
|
||||||
processor(pageProcessor2).run();
|
processor(pageProcessor2).run();
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,8 +1,8 @@
|
||||||
package us.codecraft.spider.samples;
|
package us.codecraft.webmagic.samples;
|
||||||
|
|
||||||
import us.codecraft.spider.Site;
|
import us.codecraft.webmagic.Site;
|
||||||
import us.codecraft.spider.Page;
|
import us.codecraft.webmagic.Page;
|
||||||
import us.codecraft.spider.processor.PageProcessor;
|
import us.codecraft.webmagic.processor.PageProcessor;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.htmlcleaner.CleanerProperties;
|
import org.htmlcleaner.CleanerProperties;
|
||||||
import org.htmlcleaner.HtmlCleaner;
|
import org.htmlcleaner.HtmlCleaner;
|
||||||
|
@ -6,7 +6,6 @@ import org.htmlcleaner.TagNode;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
|
@ -1,12 +1,8 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.htmlcleaner.CleanerProperties;
|
|
||||||
import org.htmlcleaner.HtmlCleaner;
|
|
||||||
import org.htmlcleaner.TagNode;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: cairne
|
* User: cairne
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.selector;
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
|
@ -1,4 +1,4 @@
|
||||||
package us.codecraft.spider.utils;
|
package us.codecraft.webmagic.utils;
|
||||||
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
|
@ -0,0 +1,26 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||||
|
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||||
|
|
||||||
|
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
|
||||||
|
<layout class="org.apache.log4j.PatternLayout">
|
||||||
|
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||||
|
</layout>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<logger name="org.springframework" additivity="false">
|
||||||
|
<level value="warn" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
<logger name="net.sf.ehcache" additivity="false">
|
||||||
|
<level value="warn" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
<root>
|
||||||
|
<level value="info" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</root>
|
||||||
|
|
||||||
|
</log4j:configuration>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,31 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||||
|
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||||
|
|
||||||
|
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
|
||||||
|
<layout class="org.apache.log4j.PatternLayout">
|
||||||
|
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||||
|
</layout>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<logger name="org.springframework" additivity="false">
|
||||||
|
<level value="warn" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
<logger name="org.apache" additivity="false">
|
||||||
|
<level value="warn" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
<logger name="net.sf.ehcache" additivity="false">
|
||||||
|
<level value="warn" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</logger>
|
||||||
|
|
||||||
|
<root>
|
||||||
|
<level value="debug" />
|
||||||
|
<appender-ref ref="stdout" />
|
||||||
|
</root>
|
||||||
|
|
||||||
|
</log4j:configuration>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,67 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<groupId>us.codecraft</groupId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>webmagic-plugin</artifactId>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>us.codecraft</groupId>
|
||||||
|
<artifactId>webmagic-core</artifactId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.7</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-sources</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-javadoc-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>attach-javadocs</id>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-release-plugin</artifactId>
|
||||||
|
<version>2.0-beta-7</version>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
|
||||||
|
</project>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue