test and so on

master
yihua.huang 2014-05-02 23:19:11 +08:00
parent 4f22f1210e
commit ec1c2e8cbc
2 changed files with 59 additions and 1 deletions

View File

@ -23,9 +23,14 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
private AtomicInteger counter;
public BloomFilterDuplicateRemover(int expectedInsertions) {
this(expectedInsertions, 0.03);
this(expectedInsertions, 0.01);
}
/**
*
* @param expectedInsertions the number of expected insertions to the constructed
* @param fpp the desired false positive probability (must be positive and less than 1.0)
*/
public BloomFilterDuplicateRemover(int expectedInsertions, double fpp) {
this.expectedInsertions = expectedInsertions;
this.fpp = fpp;

View File

@ -1,8 +1,11 @@
package us.codecraft.webmagic.scheduler;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
import static org.assertj.core.api.Assertions.assertThat;
@ -24,4 +27,54 @@ public class BloomFilterDuplicateRemoverTest {
assertThat(isDuplicate).isTrue();
}
@Ignore("long time")
@Test
public void testMemory() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times,0.005);
long freeMemory = Runtime.getRuntime().freeMemory();
long time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));
duplicateRemover = new HashSetDuplicateRemover();
System.gc();
freeMemory = Runtime.getRuntime().freeMemory();
time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
}
@Ignore("long time")
@Test
public void testMissHit() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
int right = 0;
int wrong = 0;
int missCheck = 0;
for (int i = 0; i < times; i++) {
boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (duplicate) {
wrong++;
} else {
right++;
}
duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (!duplicate) {
missCheck++;
}
}
System.out.println("Right count: " + right + " Wrong count: " + wrong + " Miss check: " + missCheck);
}
}