extension point of geturl #118
parent
ec1c2e8cbc
commit
01aec7e1ab
|
@ -46,14 +46,18 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
|
|||
|
||||
@Override
|
||||
public boolean isDuplicate(Request request, Task task) {
|
||||
boolean isDuplicate = bloomFilter.mightContain(request.getUrl());
|
||||
boolean isDuplicate = bloomFilter.mightContain(getUrl(request));
|
||||
if (!isDuplicate) {
|
||||
bloomFilter.put(request.getUrl());
|
||||
bloomFilter.put(getUrl(request));
|
||||
counter.incrementAndGet();
|
||||
}
|
||||
return isDuplicate;
|
||||
}
|
||||
|
||||
protected String getUrl(Request request) {
|
||||
return request.getUrl();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resetDuplicateCheck(Task task) {
|
||||
rebuildBloomFilter();
|
||||
|
|
|
@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
|
|||
|
||||
@Override
|
||||
public boolean isDuplicate(Request request, Task task) {
|
||||
return !urls.add(request.getUrl());
|
||||
return !urls.add(getUrl(request));
|
||||
}
|
||||
|
||||
protected String getUrl(Request request) {
|
||||
return request.getUrl();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue