diff --git a/lib/assertj-core-1.5.0.jar b/lib/assertj-core-1.5.0.jar new file mode 100644 index 0000000..071ccf4 Binary files /dev/null and b/lib/assertj-core-1.5.0.jar differ diff --git a/lib/fastjson-1.1.35.jar b/lib/fastjson-1.1.37.jar similarity index 61% rename from lib/fastjson-1.1.35.jar rename to lib/fastjson-1.1.37.jar index e2876e2..1b13b82 100644 Binary files a/lib/fastjson-1.1.35.jar and b/lib/fastjson-1.1.37.jar differ diff --git a/lib/hamcrest-core-1.3.jar b/lib/hamcrest-core-1.3.jar new file mode 100644 index 0000000..9d5fe16 Binary files /dev/null and b/lib/hamcrest-core-1.3.jar differ diff --git a/lib/htmlcleaner-2.5.jar b/lib/htmlcleaner-2.5.jar deleted file mode 100644 index 4b65546..0000000 Binary files a/lib/htmlcleaner-2.5.jar and /dev/null differ diff --git a/lib/httpclient-4.3.1.jar b/lib/httpclient-4.3.3.jar similarity index 70% rename from lib/httpclient-4.3.1.jar rename to lib/httpclient-4.3.3.jar index cdd1e38..6c46bff 100644 Binary files a/lib/httpclient-4.3.1.jar and b/lib/httpclient-4.3.3.jar differ diff --git a/lib/httpcore-4.3.jar b/lib/httpcore-4.3.2.jar similarity index 79% rename from lib/httpcore-4.3.jar rename to lib/httpcore-4.3.2.jar index ddfe6da..813ec23 100644 Binary files a/lib/httpcore-4.3.jar and b/lib/httpcore-4.3.2.jar differ diff --git a/lib/jdom2-2.0.4.jar b/lib/jdom2-2.0.4.jar deleted file mode 100644 index 19cafb8..0000000 Binary files a/lib/jdom2-2.0.4.jar and /dev/null differ diff --git a/lib/junit-4.11.jar b/lib/junit-4.11.jar new file mode 100644 index 0000000..aaf7444 Binary files /dev/null and b/lib/junit-4.11.jar differ diff --git a/lib/junit-4.7.jar b/lib/junit-4.7.jar deleted file mode 100644 index 700ad69..0000000 Binary files a/lib/junit-4.7.jar and /dev/null differ diff --git a/lib/slf4j-api-1.7.6.jar b/lib/slf4j-api-1.7.6.jar new file mode 100644 index 0000000..19aaf37 Binary files /dev/null and b/lib/slf4j-api-1.7.6.jar differ diff --git a/lib/slf4j-log4j12-1.7.6.jar b/lib/slf4j-log4j12-1.7.6.jar new file mode 100644 index 0000000..d1cc245 Binary files /dev/null and b/lib/slf4j-log4j12-1.7.6.jar differ diff --git a/lib/webmagic-core-0.4.2.jar b/lib/webmagic-core-0.4.2.jar deleted file mode 100644 index 0f07572..0000000 Binary files a/lib/webmagic-core-0.4.2.jar and /dev/null differ diff --git a/lib/webmagic-core-0.5.2.jar b/lib/webmagic-core-0.5.2.jar new file mode 100644 index 0000000..21f4103 Binary files /dev/null and b/lib/webmagic-core-0.5.2.jar differ diff --git a/lib/webmagic-extension-0.4.2.jar b/lib/webmagic-extension-0.4.2.jar deleted file mode 100644 index a19fa63..0000000 Binary files a/lib/webmagic-extension-0.4.2.jar and /dev/null differ diff --git a/lib/webmagic-extension-0.5.2.jar b/lib/webmagic-extension-0.5.2.jar new file mode 100644 index 0000000..30524b4 Binary files /dev/null and b/lib/webmagic-extension-0.5.2.jar differ diff --git a/lib/xsoup-0.1.0.jar b/lib/xsoup-0.1.0.jar deleted file mode 100644 index 926665c..0000000 Binary files a/lib/xsoup-0.1.0.jar and /dev/null differ diff --git a/lib/xsoup-0.2.4.jar b/lib/xsoup-0.2.4.jar new file mode 100644 index 0000000..c486bfe Binary files /dev/null and b/lib/xsoup-0.2.4.jar differ diff --git a/make.sh b/make.sh index 0a72237..0444c18 100644 --- a/make.sh +++ b/make.sh @@ -1,3 +1,3 @@ #!/bin/sh -mvn clean package +mvn clean dependency:copy-dependencies -DoutputDirectory=target/lib rsync -avz --delete ./webmagic-samples/target/lib/ ./lib/ diff --git a/webmagic-avalon/.classpath b/webmagic-avalon/.classpath new file mode 100644 index 0000000..86ca9e7 --- /dev/null +++ b/webmagic-avalon/.classpath @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-avalon/.project b/webmagic-avalon/.project new file mode 100644 index 0000000..4e8699d --- /dev/null +++ b/webmagic-avalon/.project @@ -0,0 +1,19 @@ + + + webmagic-avalon + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-scripts + webmagic-core + webmagic-extension + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-avalon/pom.xml b/webmagic-avalon/pom.xml index 8ad4df5..7a0b884 100644 --- a/webmagic-avalon/pom.xml +++ b/webmagic-avalon/pom.xml @@ -134,4 +134,4 @@ - \ No newline at end of file + diff --git a/webmagic-core/.classpath b/webmagic-core/.classpath new file mode 100644 index 0000000..96b7fe7 --- /dev/null +++ b/webmagic-core/.classpath @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-core/.project b/webmagic-core/.project new file mode 100644 index 0000000..be61845 --- /dev/null +++ b/webmagic-core/.project @@ -0,0 +1,15 @@ + + + webmagic-core + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-extension/.classpath b/webmagic-extension/.classpath new file mode 100644 index 0000000..e6eaa7d --- /dev/null +++ b/webmagic-extension/.classpath @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-extension/.project b/webmagic-extension/.project new file mode 100644 index 0000000..67898ec --- /dev/null +++ b/webmagic-extension/.project @@ -0,0 +1,17 @@ + + + webmagic-extension + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-core + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-lucene/.classpath b/webmagic-lucene/.classpath new file mode 100644 index 0000000..3807ba9 --- /dev/null +++ b/webmagic-lucene/.classpath @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-lucene/.project b/webmagic-lucene/.project new file mode 100644 index 0000000..8cd73c3 --- /dev/null +++ b/webmagic-lucene/.project @@ -0,0 +1,18 @@ + + + webmagic-lucene + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-extension + webmagic-core + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-samples/.classpath b/webmagic-samples/.classpath new file mode 100644 index 0000000..a641715 --- /dev/null +++ b/webmagic-samples/.classpath @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-samples/.project b/webmagic-samples/.project new file mode 100644 index 0000000..cc31865 --- /dev/null +++ b/webmagic-samples/.project @@ -0,0 +1,18 @@ + + + webmagic-samples + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-core + webmagic-extension + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-saxon/.classpath b/webmagic-saxon/.classpath new file mode 100644 index 0000000..60669ae --- /dev/null +++ b/webmagic-saxon/.classpath @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-saxon/.project b/webmagic-saxon/.project new file mode 100644 index 0000000..716f9c8 --- /dev/null +++ b/webmagic-saxon/.project @@ -0,0 +1,17 @@ + + + webmagic-saxon + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-core + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml index bc6c0b1..f7ad84c 100644 --- a/webmagic-saxon/pom.xml +++ b/webmagic-saxon/pom.xml @@ -42,4 +42,4 @@ - \ No newline at end of file + diff --git a/webmagic-scripts/.classpath b/webmagic-scripts/.classpath new file mode 100644 index 0000000..ccf3733 --- /dev/null +++ b/webmagic-scripts/.classpath @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-scripts/.project b/webmagic-scripts/.project new file mode 100644 index 0000000..18da7c6 --- /dev/null +++ b/webmagic-scripts/.project @@ -0,0 +1,18 @@ + + + webmagic-scripts + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-core + webmagic-extension + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-selenium/.classpath b/webmagic-selenium/.classpath new file mode 100644 index 0000000..6c41d8c --- /dev/null +++ b/webmagic-selenium/.classpath @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/webmagic-selenium/.project b/webmagic-selenium/.project new file mode 100644 index 0000000..446124e --- /dev/null +++ b/webmagic-selenium/.project @@ -0,0 +1,17 @@ + + + webmagic-selenium + A crawler framework. It covers the whole lifecycle of crawler: downloading, url management, content + extraction and persistent. It can simply the development of a specific crawler. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + webmagic-core + + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.jdt.core.javanature + + \ No newline at end of file diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index 5de3ee7..555a97f 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -45,4 +45,4 @@ - \ No newline at end of file +