extension point of geturl #118

master
yihua.huang 2014-05-02 23:23:23 +08:00
parent ec1c2e8cbc
commit 01aec7e1ab
2 changed files with 11 additions and 3 deletions

View File

@ -46,14 +46,18 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
@Override @Override
public boolean isDuplicate(Request request, Task task) { public boolean isDuplicate(Request request, Task task) {
boolean isDuplicate = bloomFilter.mightContain(request.getUrl()); boolean isDuplicate = bloomFilter.mightContain(getUrl(request));
if (!isDuplicate) { if (!isDuplicate) {
bloomFilter.put(request.getUrl()); bloomFilter.put(getUrl(request));
counter.incrementAndGet(); counter.incrementAndGet();
} }
return isDuplicate; return isDuplicate;
} }
protected String getUrl(Request request) {
return request.getUrl();
}
@Override @Override
public void resetDuplicateCheck(Task task) { public void resetDuplicateCheck(Task task) {
rebuildBloomFilter(); rebuildBloomFilter();

View File

@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
@Override @Override
public boolean isDuplicate(Request request, Task task) { public boolean isDuplicate(Request request, Task task) {
return !urls.add(request.getUrl()); return !urls.add(getUrl(request));
}
protected String getUrl(Request request) {
return request.getUrl();
} }
@Override @Override