fix a lucene bug
parent
9ef6de01e4
commit
e5cf2882b0
|
@ -24,7 +24,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>us.codecraft</groupId>
|
<groupId>us.codecraft</groupId>
|
||||||
<artifactId>webmagic-core</artifactId>
|
<artifactId>webmagic-extension</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -33,16 +33,14 @@ public class LucenePipeline implements Pipeline {
|
||||||
|
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
|
|
||||||
private IndexWriter indexWriter;
|
|
||||||
|
|
||||||
private Analyzer analyzer;
|
private Analyzer analyzer;
|
||||||
|
|
||||||
|
private IndexWriterConfig config;
|
||||||
|
|
||||||
private void init() throws IOException {
|
private void init() throws IOException {
|
||||||
analyzer = new StandardAnalyzer(Version.LUCENE_44);
|
analyzer = new StandardAnalyzer(Version.LUCENE_44);
|
||||||
directory = new RAMDirectory();
|
directory = new RAMDirectory();
|
||||||
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
|
config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
|
||||||
indexWriter = new IndexWriter(directory, config);
|
|
||||||
indexWriter.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public LucenePipeline() {
|
public LucenePipeline() {
|
||||||
|
@ -67,7 +65,6 @@ public class LucenePipeline implements Pipeline {
|
||||||
documents.add(hitDoc);
|
documents.add(hitDoc);
|
||||||
}
|
}
|
||||||
ireader.close();
|
ireader.close();
|
||||||
directory.close();
|
|
||||||
return documents;
|
return documents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,7 +82,9 @@ public class LucenePipeline implements Pipeline {
|
||||||
doc.add(new Field(objectEntry.getKey(), objectEntry.getValue().toString(), TextField.TYPE_STORED));
|
doc.add(new Field(objectEntry.getKey(), objectEntry.getValue().toString(), TextField.TYPE_STORED));
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
IndexWriter indexWriter = new IndexWriter(directory, config);
|
||||||
indexWriter.addDocument(doc);
|
indexWriter.addDocument(doc);
|
||||||
|
indexWriter.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
package us.codecraft.webmagic.lucene;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.model.ExtractBy;
|
||||||
|
import us.codecraft.webmagic.model.OOSpider;
|
||||||
|
import us.codecraft.webmagic.model.TargetUrl;
|
||||||
|
import us.codecraft.webmagic.pipeline.LucenePipeline;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com <br>
|
||||||
|
* @date: 13-8-2 <br>
|
||||||
|
* Time: 上午7:52 <br>
|
||||||
|
*/
|
||||||
|
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
|
||||||
|
public class OschinaBlog {
|
||||||
|
|
||||||
|
@ExtractBy("//title")
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
@ExtractBy(value = "div.BlogContent", type = ExtractBy.Type.Css)
|
||||||
|
private String content;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "OschinaBlog{" +
|
||||||
|
"title='" + title + '\'' +
|
||||||
|
", content='" + content + '\'' +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
LucenePipeline pipeline = new LucenePipeline();
|
||||||
|
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(pipeline).runAsync();
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
List<Document> search = pipeline.search("title", "webmagic");
|
||||||
|
System.out.println(search);
|
||||||
|
Thread.sleep(3000);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (ParseException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue