001package squidpony.squidmath; 002 003import regexodus.Category; 004 005import java.io.Serializable; 006 007import static regexodus.Category.caseFold; 008import static squidpony.squidmath.CrossHash.Water.*; 009 010/** 011 * Additional implementations of the {@link CrossHash.IHasher} interface for more specialized uses, like for use in an 012 * OrderedSet or OrderedMap with String keys that should use case-insensitive equality/hashing. 013 * Created by Tommy Ettinger on 4/15/2017. 014 */ 015public class Hashers { 016 private static class CaseInsensitiveStringHasher implements CrossHash.IHasher, Serializable { 017 private static final long serialVersionUID = 1L; 018 019 CaseInsensitiveStringHasher() { 020 } 021 022 @Override 023 public int hash(final Object data0) { 024 if(data0 == null) 025 return 0; 026 if(!(data0 instanceof CharSequence)) 027 return data0.hashCode(); 028 CharSequence data = (CharSequence)data0; 029 long seed = -260224914646652572L;//b1 ^ b1 >>> 41 ^ b1 << 53; 030 final int len = data.length(); 031 for (int i = 3; i < len; i+=4) { 032 seed = mum( 033 mum(caseFold(data.charAt(i-3)) ^ b1, caseFold(data.charAt(i-2)) ^ b2) + seed, 034 mum(caseFold(data.charAt(i-1)) ^ b3, caseFold(data.charAt(i )) ^ b4)); 035 } 036 switch (len & 3) { 037 case 0: seed = mum(b1 ^ seed, b4 + seed); break; 038 case 1: seed = mum(seed ^ b3, b4 ^ caseFold(data.charAt(len-1))); break; 039 case 2: seed = mum(seed ^ caseFold(data.charAt(len-2)), b3 ^ caseFold(data.charAt(len-1))); break; 040 case 3: seed = mum(seed ^ caseFold(data.charAt(len-3)) ^ caseFold(data.charAt(len-2)) << 16, b1 ^ caseFold(data.charAt(len-1))); break; 041 } 042 return (int) mum(seed ^ seed << 16, len ^ b0); 043 } 044 045 @Override 046 public boolean areEqual(Object left, Object right) { 047 if(left == right) 048 return true; 049 if(!(left instanceof CharSequence && right instanceof CharSequence)) 050 return false; 051 CharSequence l = (CharSequence)left, r = (CharSequence)right; 052 int llen = l.length(), rlen = r.length(); 053 if(llen != rlen) 054 return false; 055 for (int i = 0; i < llen; i++) { 056 if(caseFold(l.charAt(i)) != caseFold(r.charAt(i))) 057 return false; 058 } 059 return true; 060 } 061 } 062 063 /** 064 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, using case-insensitive comparison 065 * in a cross-platform way. 066 */ 067 public static final CrossHash.IHasher caseInsensitiveStringHasher = new CaseInsensitiveStringHasher(); 068 069 private static class CategoryOnlyStringHasher implements CrossHash.IHasher, Serializable { 070 private static final long serialVersionUID = 1L; 071 072 public Category category; 073 CategoryOnlyStringHasher(Category category) { 074 this.category = category; 075 } 076 077 @Override 078 public int hash(final Object data) { 079 if(data == null) 080 return 0; 081 if(!(data instanceof CharSequence)) 082 return data.hashCode(); 083 CharSequence data2 = (CharSequence)data; 084 long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L; 085 final int len = data2.length(); 086 char c; 087 for (int i = 0; i < len; i++) { 088 if(category.contains(c = data2.charAt(i))) { 089 result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL); 090 result = (result << 54 | result >>> 10); 091 } 092 } 093 result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; 094 result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; 095 return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); 096 } 097 098 @Override 099 public boolean areEqual(Object left, Object right) { 100 if(left == right) 101 return true; 102 if(!(left instanceof CharSequence && right instanceof CharSequence)) 103 return false; 104 CharSequence l = (CharSequence)left, r = (CharSequence)right; 105 int llen = l.length(), rlen = r.length(); 106 char c1, c2; 107 for (int i = 0, j = 0; i < llen && j < rlen;) { 108 while (!category.contains(c1 = l.charAt(i++))) 109 {} 110 while (!category.contains(c2 = r.charAt(j++))) 111 {} 112 if(c1 != c2) 113 return false; 114 } 115 return true; 116 } 117 } 118 private static class NoCategoryStringHasher implements CrossHash.IHasher, Serializable { 119 private static final long serialVersionUID = 1L; 120 121 public Category category; 122 NoCategoryStringHasher(Category category) { 123 this.category = category; 124 } 125 126 @Override 127 public int hash(final Object data) { 128 if(data == null) 129 return 0; 130 if(!(data instanceof CharSequence)) 131 return data.hashCode(); 132 CharSequence data2 = (CharSequence)data; 133 long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L; 134 final int len = data2.length(); 135 char c; 136 for (int i = 0; i < len; i++) { 137 if(!category.contains(c = data2.charAt(i))) { 138 result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL); 139 result = (result << 54 | result >>> 10); 140 } 141 } 142 result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; 143 result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; 144 return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); 145 } 146 147 @Override 148 public boolean areEqual(Object left, Object right) { 149 if(left == right) 150 return true; 151 if(!(left instanceof CharSequence && right instanceof CharSequence)) 152 return false; 153 CharSequence l = (CharSequence)left, r = (CharSequence)right; 154 int llen = l.length(), rlen = r.length(); 155 char c1, c2; 156 for (int i = 0, j = 0; i < llen && j < rlen;) { 157 while (category.contains(c1 = l.charAt(i++))) 158 {} 159 while (category.contains(c2 = r.charAt(j++))) 160 {} 161 if(c1 != c2) 162 return false; 163 } 164 return true; 165 } 166 } 167 168 /** 169 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but only considers letters (that 170 * is, characters that are in the Unicode category "L", including A-Z, a-z, most characters used in most non-English 171 * languages (katakana glyphs from Japanese count as letters, for instance)), and works in a cross-platform way. 172 */ 173 public static final CrossHash.IHasher letterOnlyStringHasher = new CategoryOnlyStringHasher(Category.L); 174 175 /** 176 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but only considers valid chars that 177 * are valid components of Java identifiers (it does not check that the Strings are valid identifiers, but considers 178 * only letters, digits, currency symbols, underscores (and related underscore-like characters), and a few other 179 * types of glyph, ignoring whitespace and most punctuation marks), and works in a cross-platform way. 180 */ 181 public static final CrossHash.IHasher identifierOnlyStringHasher = new CategoryOnlyStringHasher(Category.Identifier); 182 183 /** 184 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider whitespace 185 * (including space, newline, carriage return, tab, and so on), and works in a cross-platform way. 186 */ 187 public static final CrossHash.IHasher noSpaceStringHasher = new NoCategoryStringHasher(Category.Space); 188 189 /** 190 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider any number 191 * glyphs (Unicode category "N", including 0-9, but also various numbers in other languages, such as the dedicated 192 * Roman numeral characters), and works in a cross-platform way. 193 */ 194 public static final CrossHash.IHasher noNumberStringHasher = new NoCategoryStringHasher(Category.N); 195 196 /** 197 * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider letters (that 198 * is, characters that are in the Unicode category "L", including A-Z, a-z, most characters used in most non-English 199 * languages (katakana glyphs from Japanese count as letters, for instance)), and works in a cross-platform way. 200 */ 201 public static final CrossHash.IHasher noLetterStringHasher = new NoCategoryStringHasher(Category.L); 202 203 204}