001/******************************************************************************
002 Copyright 2011 See AUTHORS file.
003
004 Licensed under the Apache License, Version 2.0 (the "License");
005 you may not use this file except in compliance with the License.
006 You may obtain a copy of the License at
007
008 http://www.apache.org/licenses/LICENSE-2.0
009
010 Unless required by applicable law or agreed to in writing, software
011 distributed under the License is distributed on an "AS IS" BASIS,
012 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 See the License for the specific language governing permissions and
014 limitations under the License.
015 */
016
017package squidpony.squidmath;
018
019import java.io.Serializable;
020import java.util.Arrays;
021import java.util.NoSuchElementException;
022
023/** An unordered set where the items are unboxed ints. No allocation is done except when growing the table size.
024 * <p>
025 * This class performs fast contains and remove (typically O(1), worst case O(n) but that is rare in practice). Add may be
026 * slightly slower, depending on hash collisions. Hashcodes are rehashed to reduce collisions and the need to resize. Load factors
027 * greater than 0.91 greatly increase the chances to resize to the next higher POT size.
028 * <p>
029 * Unordered sets and maps are not designed to provide especially fast iteration. Iteration is faster with OrderedSet,
030 * OrderedMap, and for primitive-valued keys, IntIntOrderedMap and IntDoubleOrderedMap.
031 * <p>
032 * This implementation uses linear probing with the backward shift algorithm for removal. Hashcodes are rehashed using Fibonacci
033 * hashing, instead of the more common power-of-two mask, to better distribute poor hashCodes (see <a href=
034 * "https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/">Malte
035 * Skarupke's blog post</a>). Linear probing continues to work even when all hashCodes collide, just more slowly.
036 * <br>
037 * This class is based on libGDX 1.9.11-SNAPSHOT's IntSet class, and fully replaces the older ShortSet class within
038 * SquidLib's usage. Porting older code that uses ShortSet should be a little easier due to the {@link #addAll(short[])}
039 * and {@link #random(IRNG)} methods. ShortSet used the somewhat-problematic cuckoo-hashing technique, where this uses
040 * the more standard linear probing (like {@link OrderedSet}).
041 * @author Nathan Sweet
042 * @author Tommy Ettinger */
043public class IntSet implements Serializable {
044        private static final long serialVersionUID = 1L;
045
046        public int size;
047
048        int[] keyTable;
049        boolean hasZeroValue;
050
051        private final float loadFactor;
052        private int threshold;
053
054        /** Used by {@link #place(int)} to bit shift the upper bits of a {@code long} into a usable range (&gt;= 0 and &lt;=
055         * {@link #mask}). The shift can be negative, which is convenient to match the number of bits in mask: if mask is a 7-bit
056         * number, a shift of -7 shifts the upper 7 bits into the lowest 7 positions. This class sets the shift &gt; 32 and &lt; 64,
057         * which if used with an int will still move the upper bits of an int to the lower bits due to Java's implicit modulus on
058         * shifts.
059         * <p>
060         * {@link #mask} can also be used to mask the low bits of a number, which may be faster for some hashcodes, if
061         * {@link #place(int)} is overridden. */
062        protected int shift;
063
064        /** A bitmask used to confine hashcodes to the size of the table. Must be all 1 bits in its low positions, ie a power of two
065         * minus 1. If {@link #place(int)} is overriden, this can be used instead of {@link #shift} to isolate usable bits of a
066         * hash. */
067        protected int mask;
068
069        private IntSetIterator iterator1, iterator2;
070
071        /** Creates a new set with an initial capacity of 51 and a load factor of 0.8. */
072        public IntSet () {
073                this(51, 0.8f);
074        }
075
076        /** Creates a new set with a load factor of 0.8.
077         * @param initialCapacity If not a power of two, it is increased to the next nearest power of two. */
078        public IntSet (int initialCapacity) {
079                this(initialCapacity, 0.8f);
080        }
081
082        /** Creates a new set with the specified initial capacity and load factor. This set will hold initialCapacity items before
083         * growing the backing table.
084         * @param initialCapacity If not a power of two, it is increased to the next nearest power of two. */
085        public IntSet (int initialCapacity, float loadFactor) {
086                if (loadFactor <= 0f || loadFactor >= 1f)
087                        throw new IllegalArgumentException("loadFactor must be > 0 and < 1: " + loadFactor);
088                this.loadFactor = loadFactor;
089
090                int tableSize = tableSize(initialCapacity, loadFactor);
091                threshold = (int)(tableSize * loadFactor);
092                mask = tableSize - 1;
093                shift = Long.numberOfLeadingZeros(mask);
094
095                keyTable = new int[tableSize];
096        }
097
098        /** Creates a new set identical to the specified set. */
099        public IntSet (IntSet set) {
100                this((int)(set.keyTable.length * set.loadFactor), set.loadFactor);
101                System.arraycopy(set.keyTable, 0, keyTable, 0, set.keyTable.length);
102                size = set.size;
103                hasZeroValue = set.hasZeroValue;
104        }
105
106        /** Returns an index between 0 (inclusive) and {@link #mask} (inclusive) for the specified {@code item}.
107         * <p>
108         * The default implementation uses Fibonacci hashing on the item's {@link Object#hashCode()}: the hashcode is multiplied by a
109         * long constant (2 to the 64th, divided by the golden ratio) then the uppermost bits are shifted into the lowest positions to
110         * obtain an index in the desired range. Multiplication by a long may be slower than int (eg on GWT) but greatly improves
111         * rehashing, allowing even very poor hashcodes, such as those that only differ in their upper bits, to be used without high
112         * collision rates. Fibonacci hashing has increased collision rates when all or most hashcodes are multiples of larger
113         * Fibonacci numbers (see <a href=
114         * "https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/">Malte
115         * Skarupke's blog post</a>).
116         * <p>
117         * This method can be overriden to customizing hashing. This may be useful eg in the unlikely event that most hashcodes are
118         * Fibonacci numbers, if keys provide poor or incorrect hashcodes, or to simplify hashing if keys provide high quality
119         * hashcodes and don't need Fibonacci hashing: {@code return item.hashCode() & mask;} */
120        protected int place (int item) {
121                return (int)(item * 0x9E3779B97F4A7C15L >>> shift);
122        }
123
124        /** Returns the index of the key if already present, else -(index + 1) for the next empty index. This can be overridden in this
125         * pacakge to compare for equality differently than {@link Object#equals(Object)}. */
126        private int locateKey (int key) {
127                int[] keyTable = this.keyTable;
128                for (int i = place(key);; i = i + 1 & mask) {
129                        int other = keyTable[i];
130                        if (other == 0) return -(i + 1); // Empty space is available.
131                        if (other == key) return i; // Same key was found.
132                }
133        }
134
135        /** Returns true if the key was not already in the set. */
136        public boolean add (int key) {
137                if (key == 0) {
138                        if (hasZeroValue) return false;
139                        hasZeroValue = true;
140                        size++;
141                        return true;
142                }
143                int i = locateKey(key);
144                if (i >= 0) return false; // Existing key was found.
145                i = -(i + 1); // Empty space was found.
146                keyTable[i] = key;
147                if (++size >= threshold) resize(keyTable.length << 1);
148                return true;
149        }
150
151        public void addAll (IntVLA array) {
152                addAll(array.items, 0, array.size);
153        }
154
155        public void addAll (IntVLA array, int offset, int length) {
156                if (offset + length > array.size)
157                        throw new IllegalArgumentException("offset + length must be <= size: " + offset + " + " + length + " <= " + array.size);
158                addAll(array.items, offset, length);
159        }
160
161        public void addAll (int... array) {
162                addAll(array, 0, array.length);
163        }
164
165        public void addAll (int[] array, int offset, int length) {
166                ensureCapacity(length);
167                for (int i = offset, n = i + length; i < n; i++)
168                        add(array[i]);
169        }
170
171        public void addAll (short[] array) {
172                addAll(array, 0, array.length);
173        }
174
175        public void addAll (short[] array, int offset, int length) {
176                ensureCapacity(length);
177                for (int i = offset, n = i + length; i < n; i++)
178                        add(array[i]);
179        }
180
181        public void addAll (IntSet set) {
182                ensureCapacity(set.size);
183                if (set.hasZeroValue) add(0);
184                int[] keyTable = set.keyTable;
185                for (int i = 0, n = keyTable.length; i < n; i++) {
186                        int key = keyTable[i];
187                        if (key != 0) add(key);
188                }
189        }
190
191        /** Skips checks for existing keys, doesn't increment size, doesn't need to handle key 0. */
192        private void addResize (int key) {
193                int[] keyTable = this.keyTable;
194                for (int i = place(key);; i = (i + 1) & mask) {
195                        if (keyTable[i] == 0) {
196                                keyTable[i] = key;
197                                return;
198                        }
199                }
200        }
201
202        /** Returns true if the key was removed. */
203        public boolean remove (int key) {
204                if (key == 0) {
205                        if (!hasZeroValue) return false;
206                        hasZeroValue = false;
207                        size--;
208                        return true;
209                }
210
211                int i = locateKey(key);
212                if (i < 0) return false;
213                int[] keyTable = this.keyTable;
214                int mask = this.mask, next = i + 1 & mask;
215                while ((key = keyTable[next]) != 0) {
216                        int placement = place(key);
217                        if ((next - placement & mask) > (i - placement & mask)) {
218                                keyTable[i] = key;
219                                i = next;
220                        }
221                        next = next + 1 & mask;
222                }
223                keyTable[i] = 0;
224                size--;
225                return true;
226        }
227
228        /** Returns true if the set has one or more items. */
229        public boolean notEmpty () {
230                return size > 0;
231        }
232
233        /** Returns true if the set is empty. */
234        public boolean isEmpty () {
235                return size == 0;
236        }
237
238        /** Reduces the size of the backing arrays to be the specified capacity / loadFactor, or less. If the capacity is already less,
239         * nothing is done. If the set contains more items than the specified capacity, the next highest power of two capacity is used
240         * instead. */
241        public void shrink (int maximumCapacity) {
242                if (maximumCapacity < 0) throw new IllegalArgumentException("maximumCapacity must be >= 0: " + maximumCapacity);
243                int tableSize = tableSize(maximumCapacity, loadFactor);
244                if (keyTable.length > tableSize) resize(tableSize);
245        }
246
247        /** Clears the set and reduces the size of the backing arrays to be the specified capacity / loadFactor, if they are larger. */
248        public void clear (int maximumCapacity) {
249                int tableSize = tableSize(maximumCapacity, loadFactor);
250                if (keyTable.length <= tableSize) {
251                        clear();
252                        return;
253                }
254                size = 0;
255                hasZeroValue = false;
256                resize(tableSize);
257        }
258
259        public void clear () {
260                if (size == 0) return;
261                size = 0;
262                Arrays.fill(keyTable, 0);
263                hasZeroValue = false;
264        }
265
266        public boolean contains (int key) {
267                if (key == 0) return hasZeroValue;
268                return locateKey(key) >= 0;
269        }
270
271        public int first () {
272                if (hasZeroValue) return 0;
273                int[] keyTable = this.keyTable;
274                for (int i = 0, n = keyTable.length; i < n; i++)
275                        if (keyTable[i] != 0) return keyTable[i];
276                throw new IllegalStateException("IntSet is empty.");
277        }
278        /**
279         * Gets a random int from this IntSet, using the given {@link IRNG} to generate random values.
280         * If this IntSet is empty, throws an UnsupportedOperationException. This method operates in linear time, unlike
281         * the random item retrieval methods in {@link OrderedSet} and {@link OrderedMap}, which take constant time.
282         * @param rng an {@link IRNG}, such as {@link RNG} or {@link GWTRNG}
283         * @return a random int from this IntSet
284         */
285        public int random(IRNG rng) {
286                if (size <= 0) {
287                        throw new UnsupportedOperationException("IntSet cannot be empty when getting a random element");
288                }
289                int n = rng.nextInt(size);
290                int i = 0;
291                IntSetIterator isi = iterator();
292                while (n-- >= 0 && isi.hasNext)
293                        i = isi.next();
294                isi.reset();
295                return i;
296        }
297
298        /** Increases the size of the backing array to accommodate the specified number of additional items / loadFactor. Useful before
299         * adding many items to avoid multiple backing array resizes. */
300        public void ensureCapacity (int additionalCapacity) {
301                int tableSize = tableSize(size + additionalCapacity, loadFactor);
302                if (keyTable.length < tableSize) resize(tableSize);
303        }
304
305        private void resize (int newSize) {
306                int oldCapacity = keyTable.length;
307                threshold = (int)(newSize * loadFactor);
308                mask = newSize - 1;
309                shift = Long.numberOfLeadingZeros(mask);
310
311                int[] oldKeyTable = keyTable;
312
313                keyTable = new int[newSize];
314
315                if (size > 0) {
316                        for (int i = 0; i < oldCapacity; i++) {
317                                int key = oldKeyTable[i];
318                                if (key != 0) addResize(key);
319                        }
320                }
321        }
322
323        public int hashCode () {
324                int h = size;
325                int[] keyTable = this.keyTable;
326                for (int i = 0, n = keyTable.length; i < n; i++) {
327                        int key = keyTable[i];
328                        if (key != 0) h += key;
329                }
330                return h;
331        }
332
333        public boolean equals (Object obj) {
334                if (!(obj instanceof IntSet)) return false;
335                IntSet other = (IntSet)obj;
336                if (other.size != size) return false;
337                if (other.hasZeroValue != hasZeroValue) return false;
338                int[] keyTable = this.keyTable;
339                for (int i = 0, n = keyTable.length; i < n; i++)
340                        if (keyTable[i] != 0 && !other.contains(keyTable[i])) return false;
341                return true;
342        }
343
344        public String toString () {
345                if (size == 0) return "[]";
346                java.lang.StringBuilder buffer = new java.lang.StringBuilder(32);
347                buffer.append('[');
348                int[] keyTable = this.keyTable;
349                int i = keyTable.length;
350                if (hasZeroValue)
351                        buffer.append("0");
352                else {
353                        while (i-- > 0) {
354                                int key = keyTable[i];
355                                if (key == 0) continue;
356                                buffer.append(key);
357                                break;
358                        }
359                }
360                while (i-- > 0) {
361                        int key = keyTable[i];
362                        if (key == 0) continue;
363                        buffer.append(", ");
364                        buffer.append(key);
365                }
366                buffer.append(']');
367                return buffer.toString();
368        }
369
370        /** Returns an iterator for the keys in the set. Remove is supported.
371         * <p>
372         * The same iterator instance is returned each time this method is called.
373         * Use the {@link IntSetIterator} constructor for nested or multithreaded iteration. */
374        public IntSetIterator iterator () {
375                if (iterator1 == null) {
376                        iterator1 = new IntSetIterator(this);
377                        iterator2 = new IntSetIterator(this);
378                }
379                if (!iterator1.valid) {
380                        iterator1.reset();
381                        iterator1.valid = true;
382                        iterator2.valid = false;
383                        return iterator1;
384                }
385                iterator2.reset();
386                iterator2.valid = true;
387                iterator1.valid = false;
388                return iterator2;
389        }
390
391        static public IntSet with (int... array) {
392                IntSet set = new IntSet(array.length);
393                set.addAll(array);
394                return set;
395        }
396
397        static public IntSet with (IntVLA array) {
398                IntSet set = new IntSet(array.size);
399                set.addAll(array);
400                return set;
401        }
402
403        /**
404         * Allocates a new in array and fills it with the contents of this IntSet.
405         * @return a new int array containing all the int items of this IntSet.
406         */
407        public int[] toArray(){
408                int[] array = new int[size];
409                IntSetIterator it = iterator();
410                it.reset();
411                int i = 0;
412                while (it.hasNext)
413                        array[i++] = it.next();
414                return array;
415        }
416
417        /**
418         * Fills an existing (non-null) int array with as many items from this IntSet as will fit in the array, until either
419         * the array is full or the IntSet has no more items to offer.
420         * @param array a non-null int array; can have a length that is the same as, more than or less than {@link #size}
421         * @return {@code array}, after adding items from this
422         */
423        public int[] toArray(int[] array){
424                IntSetIterator it = iterator();
425                it.reset();
426                int i = 0;
427                while (it.hasNext && i < array.length)
428                        array[i++] = it.next();
429                return array;
430        }
431
432        /**
433         * Appends to an existing (non-null) {@link IntVLA} with all the int items in this IntSet.
434         * @param array a non-null IntVLA; no items will be removed, and the contents of this will be added at the end
435         * @return {@code array}, after adding items from this
436         */
437        public IntVLA appendInto(IntVLA array){
438                IntSetIterator it = iterator();
439                it.reset();
440                while (it.hasNext)
441                        array.add(it.next());
442                return array;
443        }
444
445        static public class IntSetIterator {
446                static private final int INDEX_ILLEGAL = -2, INDEX_ZERO = -1;
447
448                public boolean hasNext;
449
450                final IntSet set;
451                int nextIndex, currentIndex;
452                boolean valid = true;
453
454                public IntSetIterator (IntSet set) {
455                        this.set = set;
456                        reset();
457                }
458
459                public void reset () {
460                        currentIndex = INDEX_ILLEGAL;
461                        nextIndex = INDEX_ZERO;
462                        if (set.hasZeroValue)
463                                hasNext = true;
464                        else
465                                findNextIndex();
466                }
467
468                void findNextIndex () {
469                        int[] keyTable = set.keyTable;
470                        for (int n = keyTable.length; ++nextIndex < n;) {
471                                if (keyTable[nextIndex] != 0) {
472                                        hasNext = true;
473                                        return;
474                                }
475                        }
476                        hasNext = false;
477                }
478
479                public void remove () {
480                        int i = currentIndex;
481                        if (i == INDEX_ZERO && set.hasZeroValue) {
482                                set.hasZeroValue = false;
483                        } else if (i < 0) {
484                                throw new IllegalStateException("next must be called before remove.");
485                        } else {
486                                int[] keyTable = set.keyTable;
487                                int mask = set.mask, next = i + 1 & mask, key;
488                                while ((key = keyTable[next]) != 0) {
489                                        int placement = set.place(key);
490                                        if ((next - placement & mask) > (i - placement & mask)) {
491                                                keyTable[i] = key;
492                                                i = next;
493                                        }
494                                        next = next + 1 & mask;
495                                }
496                                keyTable[i] = 0;
497                                if (i != currentIndex) --nextIndex;
498                        }
499                        currentIndex = INDEX_ILLEGAL;
500                        set.size--;
501                }
502
503                public int next () {
504                        if (!hasNext) throw new NoSuchElementException();
505                        if (!valid) throw new IllegalStateException("#iterator() cannot be used nested.");
506                        int key = nextIndex == INDEX_ZERO ? 0 : set.keyTable[nextIndex];
507                        currentIndex = nextIndex;
508                        findNextIndex();
509                        return key;
510                }
511
512                /** Returns a new array containing the remaining keys. */
513                public IntVLA toArray () {
514                        IntVLA array = new IntVLA(set.size);
515                        while (hasNext)
516                                array.add(next());
517                        return array;
518                }
519                public IntVLA toArray (IntVLA array) {
520                        while (hasNext)
521                                array.add(next());
522                        return array;
523                }
524        }
525
526        static int tableSize (int capacity, float loadFactor) {
527                if (capacity < 0) throw new IllegalArgumentException("capacity must be >= 0: " + capacity);
528                int tableSize = HashCommon.nextPowerOfTwo(Math.max(2, (int)Math.ceil(capacity / loadFactor)));
529                if (tableSize > 1 << 30) throw new IllegalArgumentException("The required capacity is too large: " + capacity);
530                return tableSize;
531        }
532}