diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
index 42dd079..1f6657c 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
@@ -28,6 +28,8 @@ public class Request {
private Object[] extra;
+ private double priority;
+
/**
* 构建一个request对象
* @param url 必须参数,待抓取的url
@@ -38,6 +40,15 @@ public class Request {
this.extra = extra;
}
+ public double getPriority() {
+ return priority;
+ }
+
+ public Request setPriority(double priority) {
+ this.priority = priority;
+ return this;
+ }
+
/**
* 获取预存的对象
* @return object[] 预存的对象数组
@@ -54,4 +65,20 @@ public class Request {
return url;
}
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Request request = (Request) o;
+
+ if (!url.equals(request.url)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return url.hashCode();
+ }
}
diff --git a/webmagic-plugin/pom.xml b/webmagic-plugin/pom.xml
index 2225722..54c69ec 100644
--- a/webmagic-plugin/pom.xml
+++ b/webmagic-plugin/pom.xml
@@ -12,6 +12,7 @@
webmagic-misc
webmagic-selenium
+ webmagic-lucene
webmagic-plugin
diff --git a/webmagic-plugin/webmagic-lucene/pom.xml b/webmagic-plugin/webmagic-lucene/pom.xml
new file mode 100644
index 0000000..b072472
--- /dev/null
+++ b/webmagic-plugin/webmagic-lucene/pom.xml
@@ -0,0 +1,28 @@
+
+
+
+ webmagic-plugin
+ us.codecraft
+ 0.1.0
+
+ 4.0.0
+
+ webmagic-lucene
+
+
+
+ org.apache.lucene
+ lucene-analyzers-common
+ 4.4.0
+
+
+ org.apache.lucene
+ lucene-queryparser
+ 4.4.0
+
+
+
+
+
\ No newline at end of file
diff --git a/webmagic-plugin/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java b/webmagic-plugin/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java
new file mode 100644
index 0000000..2e7191c
--- /dev/null
+++ b/webmagic-plugin/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java
@@ -0,0 +1,64 @@
+package us.codecraft.webmagic.pipeline;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+import us.codecraft.webmagic.ResultItems;
+import us.codecraft.webmagic.Task;
+
+import java.io.File;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-5
+ * Time: 下午2:11
+ */
+public class LucenePipeline implements Pipeline {
+ @Override
+ public void process(ResultItems resultItems, Task task) {
+ try {
+
+ } catch (Exception e) {
+
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
+// Directory directory = new RAMDirectory();
+ // To store an index on disk, use this instead:
+ Directory directory = FSDirectory.open(new File("/data/webmagic/www.guoxue123.cn/"));
+ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
+ IndexWriter iwriter = new IndexWriter(directory, config);
+ Document doc = new Document();
+// String text = "This is the text to be indexed.";
+// doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
+// iwriter.addDocument(doc);
+ iwriter.close();
+
+ // Now search the index:
+ DirectoryReader ireader = DirectoryReader.open(directory);
+ IndexSearcher isearcher = new IndexSearcher(ireader);
+ // Parse a simple query that searches for "text":
+ QueryParser parser = new QueryParser(Version.LUCENE_44, "fieldname", analyzer);
+ Query query = parser.parse("经典");
+ ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
+ // Iterate through the results:
+ for (int i = 0; i < hits.length; i++) {
+ Document hitDoc = isearcher.doc(hits[i].doc);
+ System.out.println(hitDoc);
+ }
+ ireader.close();
+ directory.close();
+ }
+}