1
\$\begingroup\$

(See the previous version.)

Now I have this:

com.github.coderodde.util.IntHashSet:

package com.github.coderodde.util;

import java.util.HashSet;
import java.util.Random;
import java.util.Set;

/**
 * This class implements a simple hash set for non-negative {@code int} values.
 * It is used in the {@link com.github.coderodde.util.LinkedList} in order to 
 * keep track of nodes that are being pointed to by fingers.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.6 (Aug 29, 2021)
 * @since 1.6 (Aug 29, 2021)
 */
public class IntHashSet {

    private static final int INITIAL_CAPACITY = 8;

    private static final class Node {
        Node next;
        int integer;

        Node(int integer, Node next) {
            this.integer = integer;
            this.next = next;
        }

        @Override
        public String toString() {
            return "Chain node, integer = " + integer;
        }
    }

    private Node[] table = new Node[INITIAL_CAPACITY];
    private int size = 0;
    private int mask = INITIAL_CAPACITY - 1;
    
    @Override
    public String toString() {
        return "size = " + size;
    }

    public void add(int integer) {
        int targetCollisionChainIndex = integer & mask;
        Node node = table[targetCollisionChainIndex];
        
        while (node != null) {
            if (node.integer == integer) {
                return;
            }
            
            node = node.next;
        }
        
        size++;
        
        if (size > table.length) {
            Node[] newTable = new Node[2 * table.length];
            mask = newTable.length - 1;
            
            for (Node currentNode : table) {
                while (currentNode != null) {
                    Node nextNode = currentNode.next;
                    
                    int newTableHash = currentNode.integer & mask;
                    currentNode.next = newTable[newTableHash];
                    newTable[newTableHash] = currentNode;
                    
                    currentNode = nextNode;
                }
            }
            
            table = newTable;
            targetCollisionChainIndex = integer & mask;
        }
        
        Node newNode = new Node(integer, table[targetCollisionChainIndex]);
        table[targetCollisionChainIndex] = newNode;
    }

    public boolean contains(int integer) {
        final int collisionChainIndex = integer & mask;
        Node node = table[collisionChainIndex];

        while (node != null) {
            if (node.integer == integer) {
                return true;
            }

            node = node.next;
        }

        return false;
    }

    public void remove(int integer) {
        int targetCollisionChainIndex = integer & mask;
        Node node = table[targetCollisionChainIndex];
        Node prev = null;
        
        while (node != null) {
            if (node.integer == integer) {
                break;
            }
            
            prev = node;
            node = node.next;
        }
        
        if (node == null) 
            return;

        size--;
        
        if (size * 4 <= table.length && table.length >= INITIAL_CAPACITY * 4) {
            Node[] newTable = new Node[table.length / 4];
            mask = newTable.length - 1;
            
            for (Node currentNode : table) {
                while (currentNode != null) {
                    if (currentNode == node) {
                        // Omit the node with the target integer:
                        currentNode = currentNode.next;
                        continue;
                    }
                    
                    Node nextNode = currentNode.next;
                    
                    int newTableHash = currentNode.integer & mask;
                    currentNode.next = newTable[newTableHash];
                    newTable[newTableHash] = currentNode;
                    
                    currentNode = nextNode;
                }
            }
            
            table = newTable;
        } else  if (prev == null) {
            table[targetCollisionChainIndex] = 
                    table[targetCollisionChainIndex].next;
        } else {
            prev.next = prev.next.next;
        }
    }

    public void clear() {
         size = 0;
         table = new Node[INITIAL_CAPACITY];
         mask = table.length - 1;
    }
    
    private static final int DATA_LENGTH = 5_000_000;
    

    
    public static void main(String[] args) {
        Random random = new Random(10L);
        
        int[] addData      = getAddData(random);
        int[] containsData = getContainsData(random);
        int[] removeData   = getRemoveData(random);
        
        for (int iter = 0; iter < 5; iter++) {
            System.out.println(">>> Iteration: " + (iter + 1) + "/5");
            
            IntHashSet myset = new IntHashSet();
            Set<Integer> set = new HashSet<>();

            long start = System.currentTimeMillis();
            for (int i : addData) {
                myset.add(i);
            }
            long end = System.currentTimeMillis();

            System.out.println("    IntHashSet.add in " + (end - start));

            start = System.currentTimeMillis();
            for (int i : addData) {
                set.add(i);
            }
            end = System.currentTimeMillis();

            System.out.println("    HashSet.add in " + (end - start) + "\n");

            start = System.currentTimeMillis();
            for (int i : containsData) {
                myset.contains(i);
            }
            end = System.currentTimeMillis();

            System.out.println("    IntHashSet.contains in " + (end - start));

            start = System.currentTimeMillis();
            for (int i : containsData) {
                set.contains(i);
            }
            end = System.currentTimeMillis();

            System.out.println("    HashSet.contains in " + (end - start) + 
                    "\n");

            start = System.currentTimeMillis();
            for (int i : removeData) {
                myset.remove(i);
            }
            end = System.currentTimeMillis();

            System.out.println("    IntHashSet.remove in " + (end - start));

            start = System.currentTimeMillis();
            for (int i : removeData) {
                set.remove(i);
            }
            end = System.currentTimeMillis();

            System.out.println("    HashSet.remove in " + (end - start) + "\n");
        }
    }
        
    private static int[] getAddData(Random random) {
        return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
    }
        
    private static int[] getContainsData(Random random) {
        return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
    }
        
    private static int[] getRemoveData(Random random) {
        return getData(DATA_LENGTH, 3 * DATA_LENGTH / 2, random);
    }
    
    private static int[] getData(int length, int maxValue, Random random) {
        int[] data = new int[length];
        
        for (int i = 0; i < length; i++) {
            data[i] = random.nextInt(maxValue + 1);
        }
        
        return data;
    }
}

com.github.coderodde.util.IntHashSetTest:

package com.github.coderodde.util;

import java.util.Random;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;

public class IntHashSetTest {

    private final IntHashSet set = new IntHashSet();

    @Before
    public void beforeTest() {
        set.clear();
    }
    
    @Test
    public void removeFromCollisionChainBug() {
        bar("removeFromCollisionChainBug");
        
        set.add(0b00001); // 1
        set.add(0b01001); // 9
        set.add(0b10001); // 17
        
        set.remove(1); // remove from tail
        
        set.add(0b00001); // 1
        set.add(0b01001); // 9
        set.add(0b10001); // 17
        
        set.remove(1); // remove from head
        
        set.add(0b00001); // 1
        set.add(0b01001); // 9
        set.add(0b10001); // 17
    
        set.remove(17); // remove from middle
        
        bar("removeFromCollisionChainBug done!");
    }
    
    
    @Test
    public void removeBug() {
        bar("removeBug");
        
        for (int i = 0; i < 9; i++) 
            set.add(i);
        
        for (int i = 0; i < 9; i++) 
            set.remove(i);
        
        bar("removeBug done!");
    }
    
    @Test
    public void removeFirstMiddleLast() {
        bar("removeFirstMiddleLast");
        
        // All three ints will end up in the same collision chain:
        set.add(1);  // 0b00001
        set.add(9);  // 0b01001
        set.add(17); // 0b10001
        
        assertTrue(set.contains(1));
        assertTrue(set.contains(9));
        assertTrue(set.contains(17));
        
        set.remove(1);
        
        assertFalse(set.contains(1));
        assertTrue(set.contains(9));
        assertTrue(set.contains(17));
        
        set.remove(17);
        
        assertFalse(set.contains(1));
        assertTrue(set.contains(9));
        assertFalse(set.contains(17));
        
        set.remove(9);
        
        assertFalse(set.contains(1));
        assertFalse(set.contains(9));
        assertFalse(set.contains(17));
        
        bar("removeFirstMiddleLast done!");
    }

    @Test
    public void add() {
        bar("add");
        
        for (int i = 0; i < 500; i++) {
            set.add(i);
        }

        for (int i = 0; i < 500; i++) {
            assertTrue(set.contains(i));
        }

        for (int i = 500; i < 1_000; i++) {
            assertFalse(set.contains(i));
        }

        for (int i = 450; i < 550; i++) {
            set.remove(i);
        }

        for (int i = 450; i < 1_000; i++) {
            assertFalse(set.contains(i));
        }

        for (int i = 0; i < 450; i++) {
            assertTrue(set.contains(i));
        }
        
        bar("add done!");
    }

    @Test
    public void contains() {
        bar("contains");
        
        set.add(10);
        set.add(20);
        set.add(30);

        for (int i = 1; i < 40; i++) {
            if (i % 10 == 0) {
                assertTrue(set.contains(i));
            } else {
                assertFalse(set.contains(i));
            }
        }
        
        bar("contains done!");
    }

    @Test
    public void remove() {
        bar("remove");
        
        set.add(1);
        set.add(2);
        set.add(3);
        set.add(4);
        set.add(5);

        set.remove(2);
        set.remove(4);

        set.add(2);

        assertFalse(set.contains(4));

        assertTrue(set.contains(1));
        assertTrue(set.contains(2));
        assertTrue(set.contains(3));
        assertTrue(set.contains(5));
        
        bar("remove done!");
    }

    @Test
    public void clear() {
        bar("clear");
        
        for (int i = 0; i < 100; i++) {
            set.add(i);
        }

        for (int i = 0; i < 100; i++) {
            assertTrue(set.contains(i));
        }

        set.clear();

        for (int i = 0; i < 100; i++) {
            assertFalse(set.contains(i));
        }
        
        bar("clear done!");
    }

    @Test 
    public void bruteForceAdd() {
        bar("bruteForceAdd");
        
        Random random = new Random(13L);

        int[] data = new int[10_000];

        for (int i = 0; i < data.length; i++) {
            int datum = random.nextInt(5_000);
            data[i] = datum;
            set.add(datum);
        }

        for (int i = 0; i < data.length; i++) {
            assertTrue(set.contains(data[i]));
        }
        
        bar("bruteForceAdd done!");
    }

    @Test
    public void bruteForceRemove() {
        bar("bruteForceRemove");
        
        Random random = new Random(100L);

        int[] data = new int[10_000];

        for (int i = 0; i < data.length; i++) {
            int datum = random.nextInt(5_000);
            data[i] = datum;
            set.add(datum);
        }

        shuffle(data, random);

        for (int i = 0; i < data.length; i++) {
            int datum = data[i];

            if (set.contains(datum)) {
                set.remove(datum);
                if (set.contains(datum)) 
                    System.out.println("found i = " + i);
            } 

            assertFalse(set.contains(datum));
        }
        
        bar("bruteForceRemove done!");
    }

    private static void shuffle(int[] data, Random random) {
        for (int i = data.length - 1; i > 0; --i) {
            final int j = random.nextInt(i + 1);
            swap(data, i, j);
        }
    }

    private static void swap(int[] data, int i, int j) {
        int tmp = data[i];
        data[i] = data[j];
        data[j] = tmp;
    }
   
    private static void bar(String text) {
        System.out.println("--- " + text);
    }
}

Performance figures:

>>> Iteration: 1/5
    IntHashSet.add in 818
    HashSet.add in 1306

    IntHashSet.contains in 150
    HashSet.contains in 321

    IntHashSet.remove in 250
    HashSet.remove in 263

>>> Iteration: 2/5
    IntHashSet.add in 607
    HashSet.add in 1130

    IntHashSet.contains in 151
    HashSet.contains in 280

    IntHashSet.remove in 179
    HashSet.remove in 203

>>> Iteration: 3/5
    IntHashSet.add in 577
    HashSet.add in 1060

    IntHashSet.contains in 159
    HashSet.contains in 292

    IntHashSet.remove in 189
    HashSet.remove in 229

>>> Iteration: 4/5
    IntHashSet.add in 522
    HashSet.add in 891

    IntHashSet.contains in 166
    HashSet.contains in 316

    IntHashSet.remove in 193
    HashSet.remove in 233

>>> Iteration: 5/5
    IntHashSet.add in 665
    HashSet.add in 940

    IntHashSet.contains in 160
    HashSet.contains in 349

    IntHashSet.remove in 199
    HashSet.remove in 232

Critique request

Please, tell me anything that comes to mind! ^^

\$\endgroup\$
2
  • 1
    \$\begingroup\$ I just wonder why you did not update your previous version? You can change the question and improve the code as long as it has no answers. \$\endgroup\$
    – Martin R
    Commented Sep 2, 2021 at 8:48
  • \$\begingroup\$ This one should not contain bugs. \$\endgroup\$
    – coderodde
    Commented Sep 4, 2021 at 3:04

1 Answer 1

1
\$\begingroup\$

Main Code

Readability

For me, whilst you've addressed some bugs since you're initial version it looks like the code structure within your IntHashSet has moved in a less readable direction. Whilst you started out having a contract method which was responsible for reducing the size of the table now all of this logic sits within your remove method. This makes the remove method quite large. Instead, I'd have preferred the introduction of additional methods to help break the code up.

For example, one way to remove the need for comments is to add a new method to give a piece of functionality a name. So rather than:

if (currentNode != node) {
    // Omit the node with the target integer:

Perhaps this would be more descriptive

if (isNodeToBeRemoved(currentNode, nodeToBeRemoved)) {

Continue

I generally don't mind using continue, however often the code can be rearranged to remove the need for it. Sometimes this can make the code easier to read. So, rather than:

while (currentNode != null) {
    if (currentNode == node) {
        // Omit the node with the target integer:
        currentNode = currentNode.next;
        continue;
    }

    Node nextNode = currentNode.next;

    int newTableHash = currentNode.integer & mask;
    currentNode.next = newTable[newTableHash];
    newTable[newTableHash] = currentNode;

    currentNode = nextNode;
}

Consider:

while (currentNode != null) {
    Node nextNode = currentNode.next;

    if (notNodeBeingRemoved(currentNode, nodeToRemove)) {
        int newTableHash = currentNode.integer & mask;
        currentNode.next = newTable[newTableHash];
        newTable[newTableHash] = currentNode;
    }

    currentNode = nextNode;
}

I'd probably go further and extract a method addToTable(newTable, currentNode) for:

int newTableHash = currentNode.integer & mask;
currentNode.next = newTable[newTableHash];
newTable[newTableHash] = currentNode;

Duplication

The addToTable logic above is an example of a section of duplicate logic within the code. There's also some duplication between add and contains. Having add call contains would be a simple change. There's also similar logic in remove. Adding something like a private findNodeAndPrev method which performed the search and then returned the node containing the target item and its predecessor would allow the logic to be shared between all three methods, rather than being duplicated, although this might introduce unnecessary complexity.

Testing

Test Noise

All of your tests start and end with calls to bar. This seems really tedious, error prone and adds noise to the actual test code, for very little value. Consider removing them. If you really need them to be there, then whilst I haven't tested it there seems to be a way of doing this more generically using a TestWatcher/Watchman in Junit 4. Alternately, with JUnit 5 you can create a simple extension that allows you to do this sort of thing:

@ExtendWith(IntHashSetTest.InvocationLogger.class)
public class IntHashSetTest {

    static class InvocationLogger implements BeforeEachCallback, AfterEachCallback {

        @Override
        public void afterEach(ExtensionContext extensionContext) throws Exception {
            bar(extensionContext.getDisplayName());
        }

        @Override
        public void beforeEach(ExtensionContext extensionContext) throws Exception {
            bar(extensionContext.getDisplayName() + " done!");
        }

        private static void bar(String text) {
            System.out.println("--- " + text);
        }
    }

Testing What?

I found it very difficult to tell just from the names what it was your tests were testing. Names like removeBug really don't say anything. Looking at the evolution of the code, it looks like it's testing if contracting works as expected when reducing the size of the set to 0. There's no assertions so looking at the code, it looks like it's saying can I put some stuff in, and take it back out again without an exception being thrown.

Coupling

There is a high degree of coupling between your tests and the code under test. This can be necessary, however typically the way it would work is that changes to the code break the tests where as several of your tests have hidden coupling. Some tests for boundary cases assume a known initial size for the set. If that initial size is changed then the conditions that the tests are attempting to test don't occur. So, for example removeFromCollisionChainBug assumes that all of the items added are added to the same bucket. If I doubled the initial size, this would no longer be the case. Whilst the test continues to pass, it's no longer testing the same thing. When you have tests that need to be coupled to the implementation consider making this coupling explicit, so that the test can continue to work as expected, or break. An obvious way to do that here would be to add an overloaded constructor to allow the initial tablesize to be passed into your set.

Testing too much

When tests are focused on testing one thing, they're usually more straightforward and easier to understand. Adding extra assertions might seem like a good idea, but if they're not focused on the purpose of the test then they just add noise and obscure the core intention of what's being tested.

So, in your add test for example, after you add the items and assert that they have been added to the set you then loop asserting that the next 500 numbers aren't in the set.

for (int i = 500; i < 1_000; i++) {
    assertFalse(set.contains(i));
}

Does this really add value? Why stop at 1000? The test then goes on to remove items and validate that the removal has worked... this seems like a totally different test.

\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.