001/****************************************************************************** 002 Copyright 2011 See AUTHORS file. 003 004 Licensed under the Apache License, Version 2.0 (the "License"); 005 you may not use this file except in compliance with the License. 006 You may obtain a copy of the License at 007 008 http://www.apache.org/licenses/LICENSE-2.0 009 010 Unless required by applicable law or agreed to in writing, software 011 distributed under the License is distributed on an "AS IS" BASIS, 012 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 See the License for the specific language governing permissions and 014 limitations under the License. 015 */ 016 017package squidpony.squidmath; 018 019import java.io.Serializable; 020import java.util.Arrays; 021import java.util.NoSuchElementException; 022 023/** An unordered set where the items are unboxed ints. No allocation is done except when growing the table size. 024 * <p> 025 * This class performs fast contains and remove (typically O(1), worst case O(n) but that is rare in practice). Add may be 026 * slightly slower, depending on hash collisions. Hashcodes are rehashed to reduce collisions and the need to resize. Load factors 027 * greater than 0.91 greatly increase the chances to resize to the next higher POT size. 028 * <p> 029 * Unordered sets and maps are not designed to provide especially fast iteration. Iteration is faster with OrderedSet, 030 * OrderedMap, and for primitive-valued keys, IntIntOrderedMap and IntDoubleOrderedMap. 031 * <p> 032 * This implementation uses linear probing with the backward shift algorithm for removal. Hashcodes are rehashed using Fibonacci 033 * hashing, instead of the more common power-of-two mask, to better distribute poor hashCodes (see <a href= 034 * "https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/">Malte 035 * Skarupke's blog post</a>). Linear probing continues to work even when all hashCodes collide, just more slowly. 036 * <br> 037 * This class is based on libGDX 1.9.11-SNAPSHOT's IntSet class, and fully replaces the older ShortSet class within 038 * SquidLib's usage. Porting older code that uses ShortSet should be a little easier due to the {@link #addAll(short[])} 039 * and {@link #random(IRNG)} methods. ShortSet used the somewhat-problematic cuckoo-hashing technique, where this uses 040 * the more standard linear probing (like {@link OrderedSet}). 041 * @author Nathan Sweet 042 * @author Tommy Ettinger */ 043public class IntSet implements Serializable { 044 private static final long serialVersionUID = 1L; 045 046 public int size; 047 048 int[] keyTable; 049 boolean hasZeroValue; 050 051 private final float loadFactor; 052 private int threshold; 053 054 /** Used by {@link #place(int)} to bit shift the upper bits of a {@code long} into a usable range (>= 0 and <= 055 * {@link #mask}). The shift can be negative, which is convenient to match the number of bits in mask: if mask is a 7-bit 056 * number, a shift of -7 shifts the upper 7 bits into the lowest 7 positions. This class sets the shift > 32 and < 64, 057 * which if used with an int will still move the upper bits of an int to the lower bits due to Java's implicit modulus on 058 * shifts. 059 * <p> 060 * {@link #mask} can also be used to mask the low bits of a number, which may be faster for some hashcodes, if 061 * {@link #place(int)} is overridden. */ 062 protected int shift; 063 064 /** A bitmask used to confine hashcodes to the size of the table. Must be all 1 bits in its low positions, ie a power of two 065 * minus 1. If {@link #place(int)} is overriden, this can be used instead of {@link #shift} to isolate usable bits of a 066 * hash. */ 067 protected int mask; 068 069 private IntSetIterator iterator1, iterator2; 070 071 /** Creates a new set with an initial capacity of 51 and a load factor of 0.8. */ 072 public IntSet () { 073 this(51, 0.8f); 074 } 075 076 /** Creates a new set with a load factor of 0.8. 077 * @param initialCapacity If not a power of two, it is increased to the next nearest power of two. */ 078 public IntSet (int initialCapacity) { 079 this(initialCapacity, 0.8f); 080 } 081 082 /** Creates a new set with the specified initial capacity and load factor. This set will hold initialCapacity items before 083 * growing the backing table. 084 * @param initialCapacity If not a power of two, it is increased to the next nearest power of two. */ 085 public IntSet (int initialCapacity, float loadFactor) { 086 if (loadFactor <= 0f || loadFactor >= 1f) 087 throw new IllegalArgumentException("loadFactor must be > 0 and < 1: " + loadFactor); 088 this.loadFactor = loadFactor; 089 090 int tableSize = tableSize(initialCapacity, loadFactor); 091 threshold = (int)(tableSize * loadFactor); 092 mask = tableSize - 1; 093 shift = Long.numberOfLeadingZeros(mask); 094 095 keyTable = new int[tableSize]; 096 } 097 098 /** Creates a new set identical to the specified set. */ 099 public IntSet (IntSet set) { 100 this((int)(set.keyTable.length * set.loadFactor), set.loadFactor); 101 System.arraycopy(set.keyTable, 0, keyTable, 0, set.keyTable.length); 102 size = set.size; 103 hasZeroValue = set.hasZeroValue; 104 } 105 106 /** Returns an index between 0 (inclusive) and {@link #mask} (inclusive) for the specified {@code item}. 107 * <p> 108 * The default implementation uses Fibonacci hashing on the item's {@link Object#hashCode()}: the hashcode is multiplied by a 109 * long constant (2 to the 64th, divided by the golden ratio) then the uppermost bits are shifted into the lowest positions to 110 * obtain an index in the desired range. Multiplication by a long may be slower than int (eg on GWT) but greatly improves 111 * rehashing, allowing even very poor hashcodes, such as those that only differ in their upper bits, to be used without high 112 * collision rates. Fibonacci hashing has increased collision rates when all or most hashcodes are multiples of larger 113 * Fibonacci numbers (see <a href= 114 * "https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/">Malte 115 * Skarupke's blog post</a>). 116 * <p> 117 * This method can be overriden to customizing hashing. This may be useful eg in the unlikely event that most hashcodes are 118 * Fibonacci numbers, if keys provide poor or incorrect hashcodes, or to simplify hashing if keys provide high quality 119 * hashcodes and don't need Fibonacci hashing: {@code return item.hashCode() & mask;} */ 120 protected int place (int item) { 121 return (int)(item * 0x9E3779B97F4A7C15L >>> shift); 122 } 123 124 /** Returns the index of the key if already present, else -(index + 1) for the next empty index. This can be overridden in this 125 * pacakge to compare for equality differently than {@link Object#equals(Object)}. */ 126 private int locateKey (int key) { 127 int[] keyTable = this.keyTable; 128 for (int i = place(key);; i = i + 1 & mask) { 129 int other = keyTable[i]; 130 if (other == 0) return -(i + 1); // Empty space is available. 131 if (other == key) return i; // Same key was found. 132 } 133 } 134 135 /** Returns true if the key was not already in the set. */ 136 public boolean add (int key) { 137 if (key == 0) { 138 if (hasZeroValue) return false; 139 hasZeroValue = true; 140 size++; 141 return true; 142 } 143 int i = locateKey(key); 144 if (i >= 0) return false; // Existing key was found. 145 i = -(i + 1); // Empty space was found. 146 keyTable[i] = key; 147 if (++size >= threshold) resize(keyTable.length << 1); 148 return true; 149 } 150 151 public void addAll (IntVLA array) { 152 addAll(array.items, 0, array.size); 153 } 154 155 public void addAll (IntVLA array, int offset, int length) { 156 if (offset + length > array.size) 157 throw new IllegalArgumentException("offset + length must be <= size: " + offset + " + " + length + " <= " + array.size); 158 addAll(array.items, offset, length); 159 } 160 161 public void addAll (int... array) { 162 addAll(array, 0, array.length); 163 } 164 165 public void addAll (int[] array, int offset, int length) { 166 ensureCapacity(length); 167 for (int i = offset, n = i + length; i < n; i++) 168 add(array[i]); 169 } 170 171 public void addAll (short[] array) { 172 addAll(array, 0, array.length); 173 } 174 175 public void addAll (short[] array, int offset, int length) { 176 ensureCapacity(length); 177 for (int i = offset, n = i + length; i < n; i++) 178 add(array[i]); 179 } 180 181 public void addAll (IntSet set) { 182 ensureCapacity(set.size); 183 if (set.hasZeroValue) add(0); 184 int[] keyTable = set.keyTable; 185 for (int i = 0, n = keyTable.length; i < n; i++) { 186 int key = keyTable[i]; 187 if (key != 0) add(key); 188 } 189 } 190 191 /** Skips checks for existing keys, doesn't increment size, doesn't need to handle key 0. */ 192 private void addResize (int key) { 193 int[] keyTable = this.keyTable; 194 for (int i = place(key);; i = (i + 1) & mask) { 195 if (keyTable[i] == 0) { 196 keyTable[i] = key; 197 return; 198 } 199 } 200 } 201 202 /** Returns true if the key was removed. */ 203 public boolean remove (int key) { 204 if (key == 0) { 205 if (!hasZeroValue) return false; 206 hasZeroValue = false; 207 size--; 208 return true; 209 } 210 211 int i = locateKey(key); 212 if (i < 0) return false; 213 int[] keyTable = this.keyTable; 214 int mask = this.mask, next = i + 1 & mask; 215 while ((key = keyTable[next]) != 0) { 216 int placement = place(key); 217 if ((next - placement & mask) > (i - placement & mask)) { 218 keyTable[i] = key; 219 i = next; 220 } 221 next = next + 1 & mask; 222 } 223 keyTable[i] = 0; 224 size--; 225 return true; 226 } 227 228 /** Returns true if the set has one or more items. */ 229 public boolean notEmpty () { 230 return size > 0; 231 } 232 233 /** Returns true if the set is empty. */ 234 public boolean isEmpty () { 235 return size == 0; 236 } 237 238 /** Reduces the size of the backing arrays to be the specified capacity / loadFactor, or less. If the capacity is already less, 239 * nothing is done. If the set contains more items than the specified capacity, the next highest power of two capacity is used 240 * instead. */ 241 public void shrink (int maximumCapacity) { 242 if (maximumCapacity < 0) throw new IllegalArgumentException("maximumCapacity must be >= 0: " + maximumCapacity); 243 int tableSize = tableSize(maximumCapacity, loadFactor); 244 if (keyTable.length > tableSize) resize(tableSize); 245 } 246 247 /** Clears the set and reduces the size of the backing arrays to be the specified capacity / loadFactor, if they are larger. */ 248 public void clear (int maximumCapacity) { 249 int tableSize = tableSize(maximumCapacity, loadFactor); 250 if (keyTable.length <= tableSize) { 251 clear(); 252 return; 253 } 254 size = 0; 255 hasZeroValue = false; 256 resize(tableSize); 257 } 258 259 public void clear () { 260 if (size == 0) return; 261 size = 0; 262 Arrays.fill(keyTable, 0); 263 hasZeroValue = false; 264 } 265 266 public boolean contains (int key) { 267 if (key == 0) return hasZeroValue; 268 return locateKey(key) >= 0; 269 } 270 271 public int first () { 272 if (hasZeroValue) return 0; 273 int[] keyTable = this.keyTable; 274 for (int i = 0, n = keyTable.length; i < n; i++) 275 if (keyTable[i] != 0) return keyTable[i]; 276 throw new IllegalStateException("IntSet is empty."); 277 } 278 /** 279 * Gets a random int from this IntSet, using the given {@link IRNG} to generate random values. 280 * If this IntSet is empty, throws an UnsupportedOperationException. This method operates in linear time, unlike 281 * the random item retrieval methods in {@link OrderedSet} and {@link OrderedMap}, which take constant time. 282 * @param rng an {@link IRNG}, such as {@link RNG} or {@link GWTRNG} 283 * @return a random int from this IntSet 284 */ 285 public int random(IRNG rng) { 286 if (size <= 0) { 287 throw new UnsupportedOperationException("IntSet cannot be empty when getting a random element"); 288 } 289 int n = rng.nextInt(size); 290 int i = 0; 291 IntSetIterator isi = iterator(); 292 while (n-- >= 0 && isi.hasNext) 293 i = isi.next(); 294 isi.reset(); 295 return i; 296 } 297 298 /** Increases the size of the backing array to accommodate the specified number of additional items / loadFactor. Useful before 299 * adding many items to avoid multiple backing array resizes. */ 300 public void ensureCapacity (int additionalCapacity) { 301 int tableSize = tableSize(size + additionalCapacity, loadFactor); 302 if (keyTable.length < tableSize) resize(tableSize); 303 } 304 305 private void resize (int newSize) { 306 int oldCapacity = keyTable.length; 307 threshold = (int)(newSize * loadFactor); 308 mask = newSize - 1; 309 shift = Long.numberOfLeadingZeros(mask); 310 311 int[] oldKeyTable = keyTable; 312 313 keyTable = new int[newSize]; 314 315 if (size > 0) { 316 for (int i = 0; i < oldCapacity; i++) { 317 int key = oldKeyTable[i]; 318 if (key != 0) addResize(key); 319 } 320 } 321 } 322 323 public int hashCode () { 324 int h = size; 325 int[] keyTable = this.keyTable; 326 for (int i = 0, n = keyTable.length; i < n; i++) { 327 int key = keyTable[i]; 328 if (key != 0) h += key; 329 } 330 return h; 331 } 332 333 public boolean equals (Object obj) { 334 if (!(obj instanceof IntSet)) return false; 335 IntSet other = (IntSet)obj; 336 if (other.size != size) return false; 337 if (other.hasZeroValue != hasZeroValue) return false; 338 int[] keyTable = this.keyTable; 339 for (int i = 0, n = keyTable.length; i < n; i++) 340 if (keyTable[i] != 0 && !other.contains(keyTable[i])) return false; 341 return true; 342 } 343 344 public String toString () { 345 if (size == 0) return "[]"; 346 java.lang.StringBuilder buffer = new java.lang.StringBuilder(32); 347 buffer.append('['); 348 int[] keyTable = this.keyTable; 349 int i = keyTable.length; 350 if (hasZeroValue) 351 buffer.append("0"); 352 else { 353 while (i-- > 0) { 354 int key = keyTable[i]; 355 if (key == 0) continue; 356 buffer.append(key); 357 break; 358 } 359 } 360 while (i-- > 0) { 361 int key = keyTable[i]; 362 if (key == 0) continue; 363 buffer.append(", "); 364 buffer.append(key); 365 } 366 buffer.append(']'); 367 return buffer.toString(); 368 } 369 370 /** Returns an iterator for the keys in the set. Remove is supported. 371 * <p> 372 * The same iterator instance is returned each time this method is called. 373 * Use the {@link IntSetIterator} constructor for nested or multithreaded iteration. */ 374 public IntSetIterator iterator () { 375 if (iterator1 == null) { 376 iterator1 = new IntSetIterator(this); 377 iterator2 = new IntSetIterator(this); 378 } 379 if (!iterator1.valid) { 380 iterator1.reset(); 381 iterator1.valid = true; 382 iterator2.valid = false; 383 return iterator1; 384 } 385 iterator2.reset(); 386 iterator2.valid = true; 387 iterator1.valid = false; 388 return iterator2; 389 } 390 391 static public IntSet with (int... array) { 392 IntSet set = new IntSet(array.length); 393 set.addAll(array); 394 return set; 395 } 396 397 static public IntSet with (IntVLA array) { 398 IntSet set = new IntSet(array.size); 399 set.addAll(array); 400 return set; 401 } 402 403 /** 404 * Allocates a new in array and fills it with the contents of this IntSet. 405 * @return a new int array containing all the int items of this IntSet. 406 */ 407 public int[] toArray(){ 408 int[] array = new int[size]; 409 IntSetIterator it = iterator(); 410 it.reset(); 411 int i = 0; 412 while (it.hasNext) 413 array[i++] = it.next(); 414 return array; 415 } 416 417 /** 418 * Fills an existing (non-null) int array with as many items from this IntSet as will fit in the array, until either 419 * the array is full or the IntSet has no more items to offer. 420 * @param array a non-null int array; can have a length that is the same as, more than or less than {@link #size} 421 * @return {@code array}, after adding items from this 422 */ 423 public int[] toArray(int[] array){ 424 IntSetIterator it = iterator(); 425 it.reset(); 426 int i = 0; 427 while (it.hasNext && i < array.length) 428 array[i++] = it.next(); 429 return array; 430 } 431 432 /** 433 * Appends to an existing (non-null) {@link IntVLA} with all the int items in this IntSet. 434 * @param array a non-null IntVLA; no items will be removed, and the contents of this will be added at the end 435 * @return {@code array}, after adding items from this 436 */ 437 public IntVLA appendInto(IntVLA array){ 438 IntSetIterator it = iterator(); 439 it.reset(); 440 while (it.hasNext) 441 array.add(it.next()); 442 return array; 443 } 444 445 static public class IntSetIterator { 446 static private final int INDEX_ILLEGAL = -2, INDEX_ZERO = -1; 447 448 public boolean hasNext; 449 450 final IntSet set; 451 int nextIndex, currentIndex; 452 boolean valid = true; 453 454 public IntSetIterator (IntSet set) { 455 this.set = set; 456 reset(); 457 } 458 459 public void reset () { 460 currentIndex = INDEX_ILLEGAL; 461 nextIndex = INDEX_ZERO; 462 if (set.hasZeroValue) 463 hasNext = true; 464 else 465 findNextIndex(); 466 } 467 468 void findNextIndex () { 469 int[] keyTable = set.keyTable; 470 for (int n = keyTable.length; ++nextIndex < n;) { 471 if (keyTable[nextIndex] != 0) { 472 hasNext = true; 473 return; 474 } 475 } 476 hasNext = false; 477 } 478 479 public void remove () { 480 int i = currentIndex; 481 if (i == INDEX_ZERO && set.hasZeroValue) { 482 set.hasZeroValue = false; 483 } else if (i < 0) { 484 throw new IllegalStateException("next must be called before remove."); 485 } else { 486 int[] keyTable = set.keyTable; 487 int mask = set.mask, next = i + 1 & mask, key; 488 while ((key = keyTable[next]) != 0) { 489 int placement = set.place(key); 490 if ((next - placement & mask) > (i - placement & mask)) { 491 keyTable[i] = key; 492 i = next; 493 } 494 next = next + 1 & mask; 495 } 496 keyTable[i] = 0; 497 if (i != currentIndex) --nextIndex; 498 } 499 currentIndex = INDEX_ILLEGAL; 500 set.size--; 501 } 502 503 public int next () { 504 if (!hasNext) throw new NoSuchElementException(); 505 if (!valid) throw new IllegalStateException("#iterator() cannot be used nested."); 506 int key = nextIndex == INDEX_ZERO ? 0 : set.keyTable[nextIndex]; 507 currentIndex = nextIndex; 508 findNextIndex(); 509 return key; 510 } 511 512 /** Returns a new array containing the remaining keys. */ 513 public IntVLA toArray () { 514 IntVLA array = new IntVLA(set.size); 515 while (hasNext) 516 array.add(next()); 517 return array; 518 } 519 public IntVLA toArray (IntVLA array) { 520 while (hasNext) 521 array.add(next()); 522 return array; 523 } 524 } 525 526 static int tableSize (int capacity, float loadFactor) { 527 if (capacity < 0) throw new IllegalArgumentException("capacity must be >= 0: " + capacity); 528 int tableSize = HashCommon.nextPowerOfTwo(Math.max(2, (int)Math.ceil(capacity / loadFactor))); 529 if (tableSize > 1 << 30) throw new IllegalArgumentException("The required capacity is too large: " + capacity); 530 return tableSize; 531 } 532}