001package squidpony.squidmath;
002
003import regexodus.Category;
004
005import java.io.Serializable;
006
007import static regexodus.Category.caseFold;
008import static squidpony.squidmath.CrossHash.Water.*;
009
010/**
011 * Additional implementations of the {@link CrossHash.IHasher} interface for more specialized uses, like for use in an
012 * OrderedSet or OrderedMap with String keys that should use case-insensitive equality/hashing.
013 * Created by Tommy Ettinger on 4/15/2017.
014 */
015public class Hashers {
016    private static class CaseInsensitiveStringHasher implements CrossHash.IHasher, Serializable {
017        private static final long serialVersionUID = 1L;
018
019        CaseInsensitiveStringHasher() {
020        }
021
022        @Override
023        public int hash(final Object data0) {
024            if(data0 == null)
025                return 0;
026            if(!(data0 instanceof CharSequence))
027                return data0.hashCode();
028            CharSequence data = (CharSequence)data0;
029            long seed = -260224914646652572L;//b1 ^ b1 >>> 41 ^ b1 << 53;
030            final int len = data.length();
031            for (int i = 3; i < len; i+=4) {
032                seed = mum(
033                        mum(caseFold(data.charAt(i-3)) ^ b1, caseFold(data.charAt(i-2)) ^ b2) + seed,
034                        mum(caseFold(data.charAt(i-1)) ^ b3, caseFold(data.charAt(i  )) ^ b4));
035            }
036            switch (len & 3) {
037                case 0: seed = mum(b1 ^ seed, b4 + seed); break;
038                case 1: seed = mum(seed ^ b3, b4 ^ caseFold(data.charAt(len-1))); break;
039                case 2: seed = mum(seed ^ caseFold(data.charAt(len-2)), b3 ^ caseFold(data.charAt(len-1))); break;
040                case 3: seed = mum(seed ^ caseFold(data.charAt(len-3)) ^ caseFold(data.charAt(len-2)) << 16, b1 ^ caseFold(data.charAt(len-1))); break;
041            }
042            return (int) mum(seed ^ seed << 16, len ^ b0);
043        }
044
045        @Override
046        public boolean areEqual(Object left, Object right) {
047            if(left == right)
048                return true;
049            if(!(left instanceof CharSequence && right instanceof CharSequence))
050                return false;
051            CharSequence l = (CharSequence)left, r = (CharSequence)right;
052            int llen = l.length(), rlen = r.length();
053            if(llen != rlen)
054                return false;
055            for (int i = 0; i < llen; i++) {
056                if(caseFold(l.charAt(i)) != caseFold(r.charAt(i)))
057                    return false;
058            }
059            return true;
060        }
061    }
062
063    /**
064     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, using case-insensitive comparison
065     * in a cross-platform way.
066     */
067    public static final CrossHash.IHasher caseInsensitiveStringHasher = new CaseInsensitiveStringHasher();
068
069    private static class CategoryOnlyStringHasher implements CrossHash.IHasher, Serializable {
070        private static final long serialVersionUID = 1L;
071
072        public Category category;
073        CategoryOnlyStringHasher(Category category) {
074            this.category = category;
075        }
076
077        @Override
078        public int hash(final Object data) {
079            if(data == null)
080                return 0;
081            if(!(data instanceof CharSequence))
082                return data.hashCode();
083            CharSequence data2 = (CharSequence)data;
084            long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L;
085            final int len = data2.length();
086            char c;
087            for (int i = 0; i < len; i++) {
088                if(category.contains(c = data2.charAt(i))) {
089                    result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL);
090                    result = (result << 54 | result >>> 10);
091                }
092            }
093            result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L;
094            result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL;
095            return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L);
096        }
097
098        @Override
099        public boolean areEqual(Object left, Object right) {
100            if(left == right)
101                return true;
102            if(!(left instanceof CharSequence && right instanceof CharSequence))
103                return false;
104            CharSequence l = (CharSequence)left, r = (CharSequence)right;
105            int llen = l.length(), rlen = r.length();
106            char c1, c2;
107            for (int i = 0, j = 0; i < llen && j < rlen;) {
108                while (!category.contains(c1 = l.charAt(i++)))
109                {}
110                while (!category.contains(c2 = r.charAt(j++)))
111                {}
112                if(c1 != c2)
113                    return false;
114            }
115            return true;
116        }
117    }
118    private static class NoCategoryStringHasher implements CrossHash.IHasher, Serializable {
119        private static final long serialVersionUID = 1L;
120
121        public Category category;
122        NoCategoryStringHasher(Category category) {
123            this.category = category;
124        }
125
126        @Override
127        public int hash(final Object data) {
128            if(data == null)
129                return 0;
130            if(!(data instanceof CharSequence))
131                return data.hashCode();
132            CharSequence data2 = (CharSequence)data;
133            long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L;
134            final int len = data2.length();
135            char c;
136            for (int i = 0; i < len; i++) {
137                if(!category.contains(c = data2.charAt(i))) {
138                    result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL);
139                    result = (result << 54 | result >>> 10);
140                }
141            }
142            result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L;
143            result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL;
144            return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L);
145        }
146
147        @Override
148        public boolean areEqual(Object left, Object right) {
149            if(left == right)
150                return true;
151            if(!(left instanceof CharSequence && right instanceof CharSequence))
152                return false;
153            CharSequence l = (CharSequence)left, r = (CharSequence)right;
154            int llen = l.length(), rlen = r.length();
155            char c1, c2;
156            for (int i = 0, j = 0; i < llen && j < rlen;) {
157                while (category.contains(c1 = l.charAt(i++)))
158                {}
159                while (category.contains(c2 = r.charAt(j++)))
160                {}
161                if(c1 != c2)
162                    return false;
163            }
164            return true;
165        }
166    }
167
168    /**
169     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but only considers letters (that
170     * is, characters that are in the Unicode category "L", including A-Z, a-z, most characters used in most non-English
171     * languages (katakana glyphs from Japanese count as letters, for instance)), and works in a cross-platform way.
172     */
173    public static final CrossHash.IHasher letterOnlyStringHasher = new CategoryOnlyStringHasher(Category.L);
174
175    /**
176     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but only considers valid chars that
177     * are valid components of Java identifiers (it does not check that the Strings are valid identifiers, but considers
178     * only letters, digits, currency symbols, underscores (and related underscore-like characters), and a few other
179     * types of glyph, ignoring whitespace and most punctuation marks), and works in a cross-platform way.
180     */
181    public static final CrossHash.IHasher identifierOnlyStringHasher = new CategoryOnlyStringHasher(Category.Identifier);
182
183    /**
184     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider whitespace
185     * (including space, newline, carriage return, tab, and so on), and works in a cross-platform way.
186     */
187    public static final CrossHash.IHasher noSpaceStringHasher = new NoCategoryStringHasher(Category.Space);
188
189    /**
190     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider any number
191     * glyphs (Unicode category "N", including 0-9, but also various numbers in other languages, such as the dedicated
192     * Roman numeral characters), and works in a cross-platform way.
193     */
194    public static final CrossHash.IHasher noNumberStringHasher = new NoCategoryStringHasher(Category.N);
195
196    /**
197     * Hashes and equality-checks CharSequences, such as Strings and StringBuilders, but does not consider letters (that
198     * is, characters that are in the Unicode category "L", including A-Z, a-z, most characters used in most non-English
199     * languages (katakana glyphs from Japanese count as letters, for instance)), and works in a cross-platform way.
200     */
201    public static final CrossHash.IHasher noLetterStringHasher = new NoCategoryStringHasher(Category.L);
202
203
204}