Source code

001package squidpony;
002
003import regexodus.*;
004import squidpony.squidmath.NumberTools;
005import squidpony.squidmath.RNG;
006import squidpony.squidmath.StatefulRandomness;
007
008import java.io.Serializable;
009import java.util.HashMap;
010import java.util.Map;
011
012/**
013 * Class that builds up a dictionary of words in an English-language source text to words generated by a
014 * {@link FakeLanguageGen}, and can translate a source text to a similarly-punctuated, similarly-capitalized fake text;
015 * it will try to use variants on the translation of the same root word when it encounters conjugations of that root
016 * word or that root word with common English prefixes/suffixes. Performs basic stemming to separate a root word from
017 * prefixed, suffixes, and conjugation changes, then uses a phonetic hash of each such separate section to determine the
018 * RNG seed that FakeLanguageGen will use, so the translation is not random (similar-sounding root words with similar
019 * length will tend to be similar in the results as well). Can cipher an English text and generate a text with
020 * FakeLanguageGen, but also decipher such a generated text with a fully-complete, partially-complete, or
021 * partially-incorrect vocabulary.
022 * <br>
023 * This defaults to caching source-language words to their generated-language word translations in the field table, as
024 * well as the reverse translation in reverse. This can be changed to reduce memory usage for large vocabularies with
025 * {@code setCacheLevel()}, where it starts at 2 (writing to table and reverse), and can be lowered to 1 (writing to
026 * table only) if you don't need reverse to decipher a language easily, or to 0 (writing to neither) if you expect that
027 * memory will be at a premium and don't mind re-generating the same word each time it occurs in a source text. If
028 * cacheLevel is 1 or less, then this will not check for overlap between previously-generated words (it won't have an
029 * easy way to look up previously-generated ones) and so may be impossible to accurately decipher. As an example, one
030 * test of level 1 generated "he" as the translation for both "a" and "at", so every time "a" had been ciphered and then
031 * deciphered, the reproduced version said "at" instead. This won't happen by default, but the default instead relies on
032 * words being entered as inputs to cipher() or lookup() in the same order. If words are entered in two different orders
033 * to different runs of the program, they may have different generated results if cacheLevel is 2. One way to handle
034 * this is to use cacheLevel 2 and cipher the whole game script, or just the unique words in it (maybe just a large word
035 * list, such as <a href="http://wordlist.aspell.net/12dicts/">12dicts</a> ), then serialize the NaturalLanguageCipher
036 * for later usage.
037 * @author Tommy Ettinger
038 * Created by Tommy Ettinger on 5/1/2016.
039 */
040public class NaturalLanguageCipher implements Serializable{
041
042    private static class SemiRandom implements StatefulRandomness, Serializable{
043        private static final long serialVersionUID = 1287835632461186341L;
044        public long state;
045        SemiRandom()
046        {
047            state = (long) (Long.MAX_VALUE * (Math.random() * 2.0 - 1.0));
048        }
049        SemiRandom(long state)
050        {
051            this.state = state;
052        }
053        /**
054         * Get the current internal state of the StatefulRandomness as a long.
055         *
056         * @return the current internal state of this object.
057         */
058        @Override
059        public long getState() {
060            return state;
061        }
062
063        /**
064         * Set the current internal state of this StatefulRandomness with a long.
065         *
066         * @param state a 64-bit long. You should avoid passing 0, even though some implementations can handle that.
067         */
068        @Override
069        public void setState(long state) {
070            this.state = state;
071        }
072
073        /**
074         * Using this method, any algorithm that might use the built-in Java Random
075         * can interface with this randomness source.
076         *
077         * @param bits the number of bits to be returned
078         * @return the integer containing the appropriate number of bits
079         */
080        @Override
081        public int next(int bits) {
082            return (int) ((state += 0x41041041041041L) & ~(-1L << bits));
083        }
084
085        /**
086         * Using this method, any algorithm that needs to efficiently generate more
087         * than 32 bits of random data can interface with this randomness source.
088         * <p>
089         * Get a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive).
090         *
091         * @return a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive)
092         */
093        @Override
094        public long nextLong() {
095            return state += 0x41041041041041L;
096        }
097
098        public double nextDouble()
099        {
100            return NumberTools.longBitsToDouble(0x3FFL << 52 | (state += 0x41041041041041L) >>> 12) - 1.0;
101        }
102
103        /**
104         * Produces a copy of this RandomnessSource that, if next() and/or nextLong() are called on this object and the
105         * copy, both will generate the same sequence of random numbers from the point copy() was called. This just need to
106         * copy the state so it isn't shared, usually, and produce a new value with the same exact state.
107         *
108         * @return a copy of this RandomnessSource
109         */
110        @Override
111        public SemiRandom copy() {
112            return new SemiRandom(state);
113        }
114    }
115
116    private static final long serialVersionUID = 1287835632461186341L;
117    /**
118     * The FakeLanguageGen this will use to construct words; normally one of the static fields in FakeLanguageGen, a
119     * FakeLanguageGen produced by using the {@link FakeLanguageGen#mixAll(Object...)} method of two or more of them, or
120     * a random FakeLanguageGen produced by {@link FakeLanguageGen#randomLanguage(long)}. Manually constructing
121     * FakeLanguageGen objects isn't easy, and if you decide to do that it's recommended you look at SquidLib's source
122     * to see how the existing calls to constructors work.
123     */
124    public FakeLanguageGen language;
125    private SemiRandom rs;
126    private RNG rng;
127
128    String pluralSuffix, verbingSuffix, verbedSuffix, verberSuffix, verbationSuffix,
129            verbmentSuffix, nounySuffix, nounenSuffix, nounistSuffix, nounismSuffix,
130            nounicSuffix, nouniveSuffix, adjectivelySuffix, adjectivestSuffix,
131            reverbPrefix, ennounPrefix, preverbPrefix, postverbPrefix,
132            proverbPrefix, antiverbPrefix, disnounPrefix;
133
134    private static final long PLURAL = 1L, VERBING = 1L << 1, VERBED = 1L << 2, VERBER = 1L << 3,
135            VERBATION = 1L << 4, VERBMENT = 1L << 5, NOUNY = 1L << 6, NOUNEN = 1L << 7, NOUNIST = 1L << 8,
136            NOUNISM = 1L << 9, NOUNIC = 1L << 10, NOUNIVE = 1L << 11, ADJECTIVELY = 1L << 12,
137            ADJECTIVEST = 1L << 13, REVERB = 1L << 14, PREVERB = 1L << 15, POSTVERB = 1L << 16,  ENNOUN = 1L << 17,
138            PROVERB = 1L << 18,  ANTIVERB = 1L << 19,  DISNOUN = 1L << 20;
139
140    /*
141    qu->kw
142x->ks
143y->i
144kh->q
145ck->k
146ch->x
147cq->kh
148tx->x
149zh->j
150ge->j
151ew->eu
152eigh->ae
153p[fh]->f
154n([gk])->y$1
155a([bdfjlmnprtvz])e->ae$1
156e([bdjlmnptvz])e->ee$1
157i([bdfjlmnprtvz])e->ai$1
158o([bdfjlmnprtvz])e->oa$1
159u([bdfjlmnprtvz])e->uu$1
160([bdfgklmnpqrtvwxz])\1+->$1
161ace$->aes
162ece$->ees
163ice$->ais
164oce$->oas
165uce$->uus
166se$->z
167^[pc]([nts])->$1
168^fth->t
169
170     */
171    private static final Replacer[] preproc = {
172            new Replacer(Pattern.compile("([bdfgklmnpqrtvwxz])\\1+"), "$1"),
173            new Replacer(Pattern.compile("qu"), "kw", false),
174            new Replacer(Pattern.compile("x"), "ks", false),
175            new Replacer(Pattern.compile("y"), "i", false),
176            new Replacer(Pattern.compile("kh"), "q", false),
177            new Replacer(Pattern.compile("ck"), "k", false),
178            new Replacer(Pattern.compile("ch"), "x", false),
179            new Replacer(Pattern.compile("cq"), "kh", false),
180            new Replacer(Pattern.compile("tx"), "x", false),
181            new Replacer(Pattern.compile("zh"), "j", false),
182            new Replacer(Pattern.compile("ge$"), "j", false),
183            new Replacer(Pattern.compile("we$"), "w", false),
184            new Replacer(Pattern.compile("ew"), "eu", false),
185            new Replacer(Pattern.compile("eigh"), "ae", false),
186            new Replacer(Pattern.compile("p[fh]"), "f", false),
187            new Replacer(Pattern.compile("nc"), "yk", false),
188            new Replacer(Pattern.compile("n([gk])"), "y$1"),
189            new Replacer(Pattern.compile("a([bdfjlmnprtvz])e"), "ae$1"),
190            new Replacer(Pattern.compile("e([bdjlmnptvz])e"), "ee$1"),
191            new Replacer(Pattern.compile("i([bdfjlmnprtz])e"), "ai$1"),
192            new Replacer(Pattern.compile("o([bdfjlmnprtvz])e"), "oa$1"),
193            new Replacer(Pattern.compile("u([bdfjlmnprtvz])e"), "uu$1"),
194            new Replacer(Pattern.compile("ace$"), "aes", false),
195            new Replacer(Pattern.compile("ece$"), "ees", false),
196            new Replacer(Pattern.compile("ice$"), "ais", false),
197            new Replacer(Pattern.compile("oce$"), "oas", false),
198            new Replacer(Pattern.compile("uce$"), "uus", false),
199            new Replacer(Pattern.compile("se$"), "z", false),
200            new Replacer(Pattern.compile("e$"), "", false),
201            new Replacer(Pattern.compile("^[pc]([nts])"), "$1"),
202            new Replacer(Pattern.compile("^fth"), "t", false),
203    }, conjugationProc = { // 17 is REFlags.UNICODE | REFlags.IGNORE_CASE
204            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" +
205                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\2" +
206                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])", 17), "$1$2$1$2$3"),
207            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" +
208                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" +
209                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\3", 17), "$1$2$3$1$3"),
210            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]{3})" +
211                    "(?:[^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)", 17), "$1"),
212            new Replacer(Pattern.compile("([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" +
213                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])(?:\\1\\2)+", 17), "$1$2"),
214            new Replacer(Pattern.compile("[æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy]([æǽœýÿŷỳy])", 17), "$1"),
215            new Replacer(Pattern.compile("q([ùúûüũūŭůűųu])$", 17), "q$1e"),
216            new Replacer(Pattern.compile("([ìíîïĩīĭįıi])[ìíîïĩīĭįıi]", 17), "$1"),
217            new Replacer(Pattern.compile("([æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy])[wŵẁẃẅ]$", 17), "$1"),
218            new Replacer(Pattern.compile("([ùúûüũūŭůűųu])([òóôõöøōŏőǿo])", 17), "$2$1"),
219            new Replacer(Pattern.compile("[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]([æǽœ])", 17), "$1"),
220            new Replacer(Pattern.compile("([æǽœ])[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]", 17), "$1"),
221            new Replacer(Pattern.compile("([wŵẁẃẅ])[wŵẁẃẅ]", 17), "$1"),
222            new Replacer(Pattern.compile("q{2,}", 17), "q")
223    };
224
225    static final long[] bigrams = {
226//a
227            5, 22, 20, 22, 21, 22, 22, 5, 11, 20, 22, 4, 22, 22, 20, 22, 4, 4, 22, 22, 5, 22, 5, 22, 20, 22, 8,
228//b
229            52, 52, 52, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
230//c
231            58, 58, 58, 58, 33, 58, 39, 58, 32, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
232//d
233            60, 60, 60, 60, 60, 60, 60, 61, 60, 39, 60, 60, 60, 60, 60, 60, 60, 60, 60, 63, 60, 60, 60, 60, 60, 60, 60,
234//e
235            19, 6, 18, 6, 19, 6, 6, 7, 19, 6, 6, 6, 6, 6, 18, 6, 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 0,
236//f
237            42, 42, 42, 42, 42, 42, 42, 43, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
238//g
239            56, 56, 56, 56, 56, 56, 56, 41, 56, 56, 56, 56, 56, 51, 56, 56, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 56,
240//h
241            24, 53, 59, 61, 24, 43, 57, 24, 24, 39, 59, 31, 49, 51, 24, 55, 47, 29, 33, 63, 24, 41, 27, 47, 51, 35, 0,
242//i
243            16, 16, 16, 16, 11, 16, 16, 17, 11, 16, 16, 16, 16, 16, 18, 16, 16, 2, 16, 16, 16, 16, 16, 16, 18, 16, 18,
244//j
245            38, 38, 38, 38, 38, 38, 38, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
246//k
247            58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
248//l
249            30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
250//m
251            48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
252//n
253            50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 50, 50,
254//o
255            14, 4, 4, 4, 14, 4, 4, 15, 2, 4, 4, 14, 4, 4, 2, 4, 4, 14, 4, 4, 12, 4, 14, 4, 4, 4, 14,
256//p
257            54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
258//q
259            46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46,
260//r
261            28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28,
262//s
263            32, 32, 32, 34, 32, 32, 32, 37, 32, 32, 32, 32, 34, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 34, 34,
264//t
265            62, 62, 62, 63, 62, 62, 62, 45, 62, 62, 62, 58, 62, 62, 62, 62, 62, 62, 62, 63, 62, 62, 62, 47, 62, 62, 62,
266//u
267            26, 8, 8, 8, 12, 8, 8, 9, 26, 8, 8, 2, 8, 8, 2, 8, 8, 2, 8, 8, 13, 8, 13, 8, 8, 8, 12,
268//v
269            40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40,
270//w
271            26, 53, 59, 61, 26, 43, 57, 24, 26, 39, 59, 31, 49, 51, 26, 55, 47, 29, 35, 63, 24, 41, 27, 47, 51, 35, 0,
272//x
273            46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46,
274//y
275            50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 46, 50, 51, 50, 50,
276//z
277            34, 34, 34, 34, 34, 34, 34, 39, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 34, 34, 34, 34, 34, 34, 34, 34,
278    };
279
280
281    // not an OrderedMap because this should never be need a random element to be requested
282    /**
283     * The mapping of lower-case word keys to lower-case word values, where keys are in the source language and values
284     * are generated by language.
285     */
286    public HashMap<String, String> table,
287    /**
288     * The mapping of lower-case word keys to lower-case word values, where keys are generated by language and values
289     * are in the source language. Can be used as a complete vocabulary when passed to decipher.
290     */
291    reverse;
292    private static final Pattern wordPattern = Pattern.compile("(\\pL+)|(\\pL[\\pL-]*\\pL)");
293    private static final Matcher wordMatcher = wordPattern.matcher();
294
295    /**
296     * The degree of vocabulary to cache to speed up future searches at the expense of memory usage.
297     * <ul>
298     * <li>2 will cache source words to generated words in table, and generated to source in reverse.</li>
299     * <li>1 will cache source words to generated words in table, and won't write to reverse.</li>
300     * <li>0 won't write to table or reverse.</li>
301     * </ul>
302     * Defaults to 2, writing to both table and reverse.
303     */
304    public int cacheLevel = 2;
305
306    public long shift;
307
308    /**
309     * Constructs a NaturalLanguageCipher that will generate simplified English-like text by default (this uses
310     * {@link FakeLanguageGen#SIMPLISH}).
311     */
312    public NaturalLanguageCipher()
313    {
314        this(FakeLanguageGen.SIMPLISH);
315    }
316
317    /**
318     * Constructs a NaturalLanguageCipher that will use the given style of language generator to produce its text.
319     * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them.
320     */
321    public NaturalLanguageCipher(FakeLanguageGen language)
322    {
323        this(language, 0);
324    }
325
326    private Pattern[] additionalPrefixChecks = {
327            //17 is REFlags.UNICODE | REFlags.IGNORE_CASE
328            Pattern.compile("(?:(?:[pрρ][hн])|[fd])[aаαiτιuμυνv]$", 17),
329            Pattern.compile("[kкκcсςq][uμυνv]$", 17),
330            Pattern.compile("[bъыбвβЪЫБ][iτι][tтτг]$", 17),
331            Pattern.compile("[sξζzcсς](?:[hн]?)[iτιyуλγУ]$", 17),
332            Pattern.compile("[aаαΛ][nи][aаαΛiτιyуλγУuμυνvoоюσο]*$", 17),
333            Pattern.compile("[tтτΓг][iτιyуλγУ]+$", 17),
334            Pattern.compile("[cсςkкκq][lι]?[iτιyуλγУ]+$", 17),
335            Pattern.compile("[aаαΛ][sξζz]$", 17),
336            Pattern.compile("[nиfvν][iτιyуλγУaаαΛ]+$", 17),
337            Pattern.compile("[pрρ][eезξεЗΣoоюσοiτιyуλγУuμυνv]+$", 17),
338            Pattern.compile("[g][hн]?[aаαΛeезξεЗΣyуλγУ]+$", 17),
339            Pattern.compile("[wψшщuμυνv](?:[hн]?)[aаαΛeезξεЗΣoоюσοuμυνv]+$", 17),
340    }, additionalSuffixChecks = {
341            Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]*)(?:[nи]+)[tтτΓгdgkкκcсςq]", 17),
342            Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]+)(?:[nи]*)[tтτΓгdgkкκcсςq]", 17),
343            Pattern.compile("^(?:[iτιyуλγУaаαΛ]*)[gj]", 17),
344            Pattern.compile("^[nи]..?[Ssξlιζz]", 17),
345            Pattern.compile("^[iτιyуλγУaаαΛ][dtтτΓг]", 17),
346            Pattern.compile("^[iτιyуλγУaаαΛ][kкκcсςq][kкκcсςq]", 17),
347            Pattern.compile("^[uμυ]*[mм]", 17),
348    };
349
350    private String addPart(String original, int syllables)
351    {
352        String done;
353        Pattern[] checks = null;
354        if(original.endsWith("-"))
355        {
356            checks = additionalPrefixChecks;
357        }
358        else if(original.startsWith("-"))
359        {
360            checks = additionalSuffixChecks;
361        }
362        //syllables <<= 1;
363        do {
364            done = language.word(rng, false, syllables, checks);
365            if(cacheLevel < 2 || ++syllables > 5)
366                break;
367        }while(reverse.containsKey(done));
368        switch (cacheLevel) {
369            case 2: reverse.put(done, original);
370            case 1: table.put(original, done);
371        }
372        return done;
373    }
374
375    /**
376     * Constructs a NaturalLanguageCipher that will use the given style of language generator to produce its text, using
377     * the specified {@code shift} as a long to modify the generated words from the language's normal results.
378     * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them.
379     * @param shift any long; this will be used to alter the specific words generated unless it is 0
380     */
381    public NaturalLanguageCipher(FakeLanguageGen language, long shift)
382    {
383        rs = new SemiRandom(0xDF58476D1CE4E5B9L + shift);
384        rng = new RNG(rs);
385        table = new HashMap<>(512, 0.375f);
386        reverse = new HashMap<>(512, 0.375f);
387        initialize(language, shift);
388    }
389
390    /**
391     * Changes the language this can cipher, clearing its known translation (if any) and using the given FakeLanguageGen
392     * and shift as if given to {@link #NaturalLanguageCipher(FakeLanguageGen, long)}.
393     * @param language the FakeLanguageGen to change to
394     * @param shift any long; this will be used to alter the specific words generated unless it is 0
395     * @return this for chaining
396     */
397    public NaturalLanguageCipher initialize(FakeLanguageGen language, long shift)
398    {
399        rs.state = 0xDF58476D1CE4E5B9L + shift;
400        this.shift = shift;
401        this.language = language.copy();
402        table.clear();
403        reverse.clear();
404        pluralSuffix = addPart("-s", 0);
405        nounySuffix = addPart("-y", 0);
406        nounicSuffix = addPart("-ic", 0);
407        nouniveSuffix = addPart("-ive", 0);
408        nounistSuffix = addPart("-ist", 0);
409        nounismSuffix = addPart("-ism", 1 + (rng.nextSignedInt(3) >> 1));
410        nounenSuffix = addPart("-en", 0);
411        verbedSuffix = addPart("-ed", 0);
412        verberSuffix = addPart("-er", 0);
413        verbingSuffix = addPart("-ing", 1);
414        verbmentSuffix = addPart("-ment", 0);
415        verbationSuffix = addPart("-ation", rng.nextSignedInt(2) + 1);
416        adjectivelySuffix = addPart("-ly", 0);
417        adjectivestSuffix = addPart("-est", 0);
418        reverbPrefix = addPart("re-", 0);
419        ennounPrefix = addPart("en-", 0);
420        preverbPrefix = addPart("pre-", 0);
421        proverbPrefix = addPart("pro-", 0);
422        postverbPrefix = addPart("post-", 0);
423        antiverbPrefix = addPart("anti-", 2 - (rng.nextSignedInt(3) >> 1));
424        disnounPrefix = addPart("dis-", 0);
425        table.clear();
426        reverse.clear();
427        return this;
428    }
429
430
431    /**
432     * Copies another NaturalLanguageCipher and constructs this one with the information in the other. Copies the dictionary
433     * of known words/prefixes/suffixes/conjugations, as well as the FakeLanguageGen style and everything else.
434     * @param other a previously-constructed NaturalLanguageCipher.
435     */
436    public NaturalLanguageCipher(NaturalLanguageCipher other)
437    {
438        language = other.language.copy();
439        rs = other.rs.copy();
440        rng = new RNG(rs);
441        table = new HashMap<>(other.table.size(), 0.375f);
442        table.putAll(other.table);
443        reverse = new HashMap<>(other.reverse.size(), 0.375f);
444        reverse.putAll(other.reverse);
445        shift = other.shift;
446        pluralSuffix = other.pluralSuffix;
447        nounySuffix = other.nounySuffix;
448        nounicSuffix = other.nounicSuffix;
449        nouniveSuffix = other.nouniveSuffix;
450        nounistSuffix = other.nounistSuffix;
451        nounismSuffix = other.nounismSuffix;
452        nounenSuffix = other.nounenSuffix;
453        verbedSuffix = other.verbedSuffix;
454        verberSuffix = other.verberSuffix;
455        verbingSuffix = other.verbingSuffix;
456        verbmentSuffix = other.verbmentSuffix;
457        verbationSuffix = other.verbationSuffix;
458        adjectivelySuffix = other.adjectivelySuffix;
459        adjectivestSuffix = other.adjectivestSuffix;
460        reverbPrefix = other.reverbPrefix;
461        ennounPrefix = other.ennounPrefix;
462        preverbPrefix = other.preverbPrefix;
463        postverbPrefix = other.postverbPrefix;
464        proverbPrefix = other.proverbPrefix;
465        antiverbPrefix = other.antiverbPrefix;
466        disnounPrefix = other.disnounPrefix;
467    }
468
469    /**
470     * Gets a phonetic hash of a section of {@code data} between {@code start} inclusive and {@code end} exclusive; this
471     * 64-bit hash should be similar for similar words, instead of very different if they are different at all. The
472     * algorithm is conceptually related to a locality-sensitive hash, and is inspired by
473     * <a href="https://github.com/ticki/eudex">Eudex</a>; like Eudex, the Hamming distance between the hashes of two
474     * similar words should be low, even if the values are very different on a number line. The input to this must
475     * contain lower-case ASCII letters, since that is all this knows how to read (characters not between 'a' and 'z'
476     * are ignored). In NaturalLanguageCipher, the hashes this produces are given as seeds to an
477     * intentionally-low-quality RandomnessSource that produces  similar results for similar input states, which makes
478     * it likely to generate output words that are similar to each other when the input words are similar to each other.
479     * @param data a char array that should contain letters from 'a' to 'z' this can hash
480     * @param start the starting position in data to read, inclusive
481     * @param end the end position in data to stop reading at, exclusive
482     * @return a 64-bit long hash that should have a low Hamming distance to phonetic hashes of similar words  
483     */
484    public static long phoneticHash64(char[] data, int start, int end)
485    {
486        if(data == null || end <= start || start >= data.length)
487            return 0L;
488        int current, next, count = 0, used = 0;
489        long got, vc = 0, h = 0L;
490        boolean vowelStream = false;
491        for (int i = start; i < end && count < 10; i++, count++) { // && vc < 7
492            current = data[i] - 'a';
493            if(current < 0 || current > 26)
494                continue;
495            if(i + 1 < end) {
496                if((next = data[i + 1] - 'a') < 0 || next > 26)
497                    continue;
498            }
499            else
500                next = 26;
501            got = bigrams[27 * current + next];
502            if(got == 0)
503                continue;
504            h <<= 6;
505            //b <<= 3;
506            got = bigrams[27 * current + next];
507            i += got & 1L;
508            h |= got >>= 1;
509            used++;
510            //used += 5;
511            if(count == 0) {
512                vowelStream = got > 0 && got < 12;
513            }else if (vowelStream != (got > 0 && got < 12)) {
514                vc+= vowelStream ? 1 : 0;
515                vowelStream = !vowelStream;
516            }
517            //b |= (got >> 2);
518        }
519        vc += vowelStream ? 1 : 0;
520
521        if(used > 0 && count > 0) {
522            got = h;
523            for (; count < 11; count += used) {
524                h |= got << (6 * count);
525            }
526            h &= 0xFFFFFFFFFFFFFFFL; // 60 bits
527        }
528        /*b &= ~(-1 << (35-Math.min(used, 35)));
529        if(used <= 20)
530            b ^= b << 8;
531        h ^= ((vc & 7L) << 39) | (b << (used + 3));
532        */
533        vc = Math.max(1L, vc);
534        return h | ((vc & 15L) << 60);
535    }
536
537    private String conjugate(String data, long mods)
538    {
539        if(data == null)
540            return "";
541        StringBuilder sb = new StringBuilder(data);
542
543        if((mods & ENNOUN) != 0)
544        {
545            sb.insert(0, ennounPrefix);
546        }
547        if((mods & DISNOUN) != 0)
548        {
549            sb.insert(0, disnounPrefix);
550        }
551        if((mods & REVERB) != 0)
552        {
553            sb.insert(0, reverbPrefix);
554        }
555        if((mods & ANTIVERB) != 0)
556        {
557            sb.insert(0, antiverbPrefix);
558        }
559        if((mods & PROVERB) != 0)
560        {
561            sb.insert(0, proverbPrefix);
562        }
563        if((mods & POSTVERB) != 0)
564        {
565            sb.insert(0, postverbPrefix);
566        }
567        if((mods & PREVERB) != 0)
568        {
569            sb.insert(0, preverbPrefix);
570        }
571        if((mods & NOUNEN) != 0) {
572            sb.append(nounenSuffix);
573        }
574        if((mods & VERBER) != 0) {
575            sb.append(verberSuffix);
576        }
577        if((mods & VERBMENT) != 0) {
578            sb.append(verbmentSuffix);
579        }
580        if((mods & VERBATION) != 0) {
581            sb.append(verbationSuffix);
582        }
583        if((mods & NOUNIVE) != 0) {
584            sb.append(nouniveSuffix);
585        }
586        if((mods & NOUNISM) != 0) {
587            sb.append(nounismSuffix);
588        }
589        if((mods & NOUNIST) != 0) {
590            sb.append(nounistSuffix);
591        }
592        if((mods & NOUNIC) != 0) {
593            sb.append(nounicSuffix);
594        }
595        if((mods & ADJECTIVEST) != 0) {
596            sb.append(adjectivestSuffix);
597        }
598        if((mods & VERBED) != 0) {
599            sb.append(verbedSuffix);
600        }
601        if((mods & VERBING) != 0) {
602            sb.append(verbingSuffix);
603        }
604        if((mods & NOUNY) != 0) {
605            sb.append(nounySuffix);
606        }
607        if((mods & ADJECTIVELY) != 0) {
608            sb.append(adjectivelySuffix);
609        }
610        if((mods & PLURAL) != 0) {
611            sb.append(pluralSuffix);
612        }
613        String done = sb.toString();
614        for(int conproc = 0; conproc < conjugationProc.length; conproc++)
615        {
616            done = conjugationProc[conproc].replace(done);
617        }
618        return done;
619    }
620    /**
621     * Given a word in the source language (usually English), looks up an existing translation for that word, or if none
622     * exists, generates a new word based on the phonetic hash of the source word, any of its stemming information such
623     * as prefixes or suffixes, and this NaturalLanguageCipher's FakeLanguageGen.
624     * @param source a word in the source language
625     * @return a word in the fake language
626     */
627    public String lookup(String source)
628    {
629        if(source == null || source.isEmpty())
630            return "";
631        String s2 = source.toLowerCase(), ciphered;
632        if(table.containsKey(s2))
633            ciphered = table.get(s2);
634        else {
635            CharSequence altered = FakeLanguageGen.removeAccents(s2);
636            for (int i = 0; i < preproc.length; i++) {
637                altered = preproc[i].replace(altered);
638            }
639
640            char[] sc = ((String)altered).toCharArray(), scO = s2.toCharArray();
641            int start = 0, end = sc.length, endO = scO.length;
642            long mods = 0;
643            /*
644            boolean plural = false, verbing = false, verbed = false, verber = false, verbation = false,
645                    verbment = false, nouny = false, nounen = false, nounist = false, nounism = false,
646                    nounic = false, nounive = false, adjectively = false, adjectivest = false,
647                    //prefixes
648                    reverb = false, ennoun = false, preverb = false, postverb = false,
649                    proverb = false, antiverb = false, disnoun = false;
650            */
651            if(end >= 4 && endO >= 4 && sc[end-1]=='s' && sc[end-2]!='s') // checking for extra 's' helps singular nouns like "dress" and "princess"
652            {
653                mods |= PLURAL;
654                end--;
655                endO--;
656                if(scO[endO-1] == 'e')
657                {
658                    end--;
659                    endO--;
660                }
661            }
662            if(end >= 5 && endO >= 5 && sc[end - 2] == 'l' && sc[end-1] == 'y')
663            {
664                mods |= ADJECTIVELY;
665                end -= 2;
666                endO -= 2;
667            }
668            /*
669            else if(end >= 4 && endO >= 4 && scO[endO-1] == 'y')
670            {
671                mods |= NOUNY;
672                end--;
673                endO--;
674            }*/
675            if(end >= 5 && endO >= 5 && scO[endO-3] == 'i' && scO[endO-2] == 'n' && scO[endO-1]=='g')
676            {
677                mods |= VERBING;
678                end-=3;
679                endO -= 3;
680            }
681            if(end >= 4 && endO >= 4 && (scO[endO-3] == 'a' || scO[endO-3] == 'o') && scO[endO-2] == 'd' && scO[endO-1]=='e')
682            {
683                mods |= VERBED;
684                end-=3;
685                endO-=3;
686            }
687            else if(end >= 4 && endO >= 4 && scO[endO-2] == 'e' && scO[endO-1] == 'd')
688            {
689                mods |= VERBED;
690                end-=2;
691                endO-=2;
692            }
693            else if(end >= 5 && endO >= 5 && sc[end - 3] == 'e' && sc[end - 2] == 's' && sc[end-1] == 't')
694            {
695                mods |= ADJECTIVEST;
696                end -= 3;
697                endO -= 3;
698            }
699            if(end >= 5 && endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'c')
700            {
701                mods |=NOUNIC;
702                end -= 2;
703                endO-=2;
704            }
705            else if(end >= 6 && endO >= 6 && scO[endO-3] == 'i' && scO[endO-2] == 'v' && scO[endO-1] == 'e') {
706                mods |= NOUNIVE;
707                end -= 3;
708                endO -= 3;
709                if (end >= 4 && endO >= 4 && (scO[endO - 2] == 'a' || scO[endO - 2] == 'i') && scO[endO - 1] == 't') {
710                    end -= 2;
711                    endO -= 2;
712                }
713            }
714            if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 't')
715            {
716                mods |=NOUNIST;
717                end -= 3;
718                endO-=3;
719                if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v')
720                {
721                    mods |= NOUNIVE;
722                    end-=2;
723                    endO-=2;
724                }
725            }
726            if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 'm')
727            {
728                mods |=NOUNISM;
729                end -= 3;
730                endO -= 3;
731                if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v')
732                {
733                    mods |= NOUNIVE;
734                    end-=2;
735                    endO-=2;
736                }
737
738            }
739            if(end >= 8 && endO >= 8 && (scO[endO - 4] == 't' || scO[endO - 4] == 's' || scO[endO - 4] == 'c') && scO[endO-3] == 'i' && scO[endO-2] == 'o' && scO[endO-1]=='n')
740            {
741                mods |=VERBATION;
742                end-=4;
743                endO -= 4;
744            }
745            if(end >= 6 && sc[end-4] == 'm' && sc[end-3] == 'e' && sc[end-2] == 'n' && sc[end-1] == 't')
746            {
747                mods |=VERBMENT;
748                end-=4;
749                endO -= 4;
750            }
751            if(end >= 7 && endO >= 7 && scO[endO-3] == 'i' && scO[endO-2] == 'a' && scO[endO-1]=='n')
752            {
753                mods |=VERBER;
754                end-=3;
755                        }
756            else if(end >= 4 && endO >= 4 && (sc[end-2] == 'e' || sc[end-2] == 'o') && sc[end-1] == 'r')
757            {
758                mods |= VERBER;
759                end-=2;
760            }
761            if(end >= 4 && sc[end-2] == 'e' && sc[end-1]=='n')
762            {
763                mods |=NOUNEN;
764                end-=2;
765            }
766            if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'e')
767            {
768                mods |=PREVERB;
769                start += 3;
770            }
771            if(end - start >= 6 && sc[start] == 'p' && sc[start+1] == 'o' && sc[start+2] == 's' && sc[start+3] == 't')
772            {
773                mods |= POSTVERB;
774                start += 4;
775            }
776
777            if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'o')
778            {
779                mods |= PROVERB;
780                start += 3;
781            }
782            else {
783                if (end - start >= 6 && sc[start] == 'a' && sc[start + 1] == 'n' && sc[start + 2] == 't' && sc[start + 3] == 'i') {
784                    mods |= ANTIVERB;
785                    start += 4;
786                }
787                else if (end - start >= 8 && sc[start] == 'c' && sc[start + 1] == 'o' && sc[start + 2] == 'n' && sc[start + 3] == 't' && sc[start + 4] == 'r' && sc[start + 5] == 'a') {
788                    mods |= ANTIVERB;
789                    start += 6;
790                }
791            }
792            if(end - start >= 4 && sc[start] == 'r' && sc[start+1] == 'e')
793            {
794                mods |= REVERB;
795                start += 2;
796            }
797            if(end - start >= 5 && sc[start] == 'd' && sc[start+1] == 'i' && sc[start+2] == 's')
798            {
799                mods |= DISNOUN;
800                start += 3;
801            }
802            if(end - start >= 4 && sc[start] == 'u' && sc[start+1] == 'n')
803            {
804                mods |= ANTIVERB;
805                start += 2;
806            }
807            if(end - start >= 4 && (sc[start] == 'e' || sc[start] == 'i') && sc[start+1] == 'n')
808            {
809                mods |= ENNOUN;
810                start += 2;
811            }
812            long h = phoneticHash64(sc, start, end) ^ (shift & 0xFFFFFFFFFFFFFFFL) ^ (shift >>> 14), frustration = 0;
813            //System.out.print(source + ":" + ((h >>> 60) & 7) + ":" + StringKit.hex(h) + ", ");
814            rs.setState(h);
815            do {
816                ciphered = conjugate(language.word(rng, false, (int) Math.ceil((h >>> 60) / (0.9 + 0.5 * rng.nextDouble()))), mods);
817                if(cacheLevel < 2 || frustration++ > 9)
818                    break;
819            }while (reverse.containsKey(ciphered));
820            switch (cacheLevel) {
821                case 2: reverse.put(ciphered, s2);
822                case 1: table.put(s2, ciphered);
823            }
824        }
825        char[] chars = ciphered.toCharArray();
826        // Lu is the upper case letter category in Unicode; we're using regexodus for this because GWT won't
827        // respect unicode case data on its own (see
828        // https://github.com/gwtproject/gwt/blob/2.6.1/user/super/com/google/gwt/emul/java/lang/Character.java#L54-L61
829        // ). We are using GWT to capitalize, though, which appears to work in practice and the docs agree.
830        if(Category.Lu.contains(source.charAt(0)))
831            chars[0] = Character.toUpperCase(chars[0]);
832        if(source.length() > 1 && Category.Lu.contains(source.charAt(1))) {
833            for (int i = 1; i < chars.length; i++) {
834                chars[i] = Character.toUpperCase(chars[i]);
835            }
836        }
837        return new String(chars);
838    }
839
840    /**
841     * Given a String that should contain words in the source language, this translates each word to the fake language,
842     * using existing translations if previous calls to cipher() or lookup() had translated that word.
843     * @param text a String that contains words in the source language
844     * @return a String of the translated text.
845     */
846    public String cipher(String text)
847    {
848        Replacer rep = wordPattern.replacer(new CipherSubstitution());
849        return rep.replace(text.replace('-', '\u2013'));
850    }
851
852    private class CipherSubstitution implements Substitution
853    {
854        @Override
855        public void appendSubstitution(MatchResult match, TextBuffer dest) {
856            dest.append(lookup(match.group(0)));
857        }
858    }
859    private static class DecipherSubstition implements Substitution
860    {
861        private final Map<String, String> vocabulary;
862        DecipherSubstition(final Map<String, String> vocabulary)
863        {
864            this.vocabulary = vocabulary;
865        }
866        public void appendSubstitution(MatchResult match, TextBuffer dest) {
867            String translated = match.group(0);
868            if(translated == null) {
869                return;
870            }
871            translated = translated.toLowerCase();
872            translated = vocabulary.get(translated);
873            if(translated == null) {
874                dest.append(match.group(0));
875                return;
876            }
877            char[] chars = translated.toCharArray();
878            if(Category.Lu.contains(match.charAt(0)))
879                chars[0] = Character.toUpperCase(chars[0]);
880            if(match.length() > 1 && Category.Lu.contains(match.charAt(1))) {
881                for (int i = 1; i < chars.length; i++) {
882                    chars[i] = Character.toUpperCase(chars[i]);
883                }
884            }
885            dest.append(chars, 0, chars.length);
886        }
887    }
888
889    /**
890     * Deciphers words in an already-ciphered text with a given String-to-String Map for a vocabulary. This Map could be
891     * the reverse field of this NaturalLanguageCipher, which would give a complete translation, or it could be a
892     * partially-complete or partially-correct vocabulary of words the player has learned. The vocabulary should
893     * typically have entries added using the quick and accurate {@link #learnTranslations(Map, String...)} method,
894     * unless you want to add translations one word at a time (then use {@link #learnTranslation(Map, String)}) or you
895     * want incorrect or biased translations added (then use {@link #mismatchTranslation(Map, String, String)}). You
896     * don't need to use one of these methods if you just pass the whole of the reverse field as a vocabulary, which
897     * will translate every word. If making your own vocabulary without the learn methods, the keys need to be
898     * lower-case because while regex Patterns can be case-insensitive, the Maps used here are not.
899     * @param text a text in the fake language, as a CharSequence such as a String or StringBuilder
900     * @param vocabulary a Map of Strings in the fake language to Strings in the source language
901     * @return a String of deciphered text that has any words as keys in vocabulary translated to the source language
902     */
903    public String decipher(CharSequence text, final Map<String, String> vocabulary)
904    {
905        Pattern pat;
906        Replacer rep;
907        StringBuilder sb = new StringBuilder(128);
908        sb.append("(?:");
909        for(String k : vocabulary.keySet())
910        {
911            sb.append("(?:\\Q").append(k).append("\\E)|");
912        }
913        sb.deleteCharAt(sb.length() - 1)
914                .append(')');
915
916        pat = Pattern.compile("(?<![\\pL\\&-])(?=[\\pL\\&-])" + sb + "(?![\\pL\\&-])", "ui");
917
918        rep = pat.replacer(new DecipherSubstition(vocabulary));
919        return rep.replace(text);
920    }
921
922    /**
923     * Adds a translation pair to vocabulary so it can be used in decipher, giving a correct translation for sourceWord.
924     * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct a
925     * mismatched translation added to vocabulary with mismatchTranslation().
926     * @param vocabulary a Map of String keys to String values that will be modified in-place
927     * @param sourceWord a word in the source language, typically English; the meaning will be "learned" for decipher
928     * @return this, for chaining
929     */
930    public NaturalLanguageCipher learnTranslation(Map<String, String> vocabulary, String sourceWord)
931    {
932        vocabulary.put(lookup(sourceWord.toLowerCase()), sourceWord);
933        return this;
934    }
935
936    /**
937     * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords.
938     * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct
939     * mismatched translations added to vocabulary with mismatchTranslation().
940     * @param vocabulary a Map of String keys to String values that will be modified in-place
941     * @param sourceWords an array or vararg of words in the source language, typically English; their meanings will
942     *                    be "learned" for decipher
943     * @return this, for chaining
944     */
945    public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, String... sourceWords)
946    {
947        for (int i = 0; i < sourceWords.length; i++) {
948            learnTranslation(vocabulary, sourceWords[i]);
949        }
950        return this;
951    }
952
953    /**
954     * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords.
955     * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct
956     * mismatched translations added to vocabulary with mismatchTranslation().
957     * @param vocabulary a Map of String keys to String values that will be modified in-place
958     * @param sourceWords an Iterable of words in the source language, typically English; their meanings will be
959     *                   "learned" for decipher
960     * @return this, for chaining
961     */
962    public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, Iterable<String> sourceWords)
963    {
964        for (String s : sourceWords) {
965            learnTranslation(vocabulary, s);
966        }
967        return this;
968    }
969
970    /**
971     * Adds a translation pair to vocabulary so it can be used in decipher, giving a typically-incorrect translation for
972     * correctWord where it provides mismatchWord instead when the ciphered version of correctWord appears.
973     * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. You can use learnTranslation()
974     * to correct a mismatched vocabulary word, or mismatchTranslation() again to change the mismatched word.
975     * @param vocabulary a Map of String keys to String values that will be modified in-place
976     * @param correctWord a word in the source language, typically English; where the ciphered version of this
977     *                    appears and the text is deciphered, mismatchWord will be used instead
978     * @param mismatchWord a String that will be used for deciphering in place of the translation of correctWord.
979     * @return this, for chaining
980     */
981    public NaturalLanguageCipher mismatchTranslation(Map<String, String> vocabulary, String correctWord, String mismatchWord)
982    {
983        vocabulary.put(lookup(correctWord.toLowerCase()), mismatchWord);
984        return this;
985    }
986
987    public int getCacheLevel() {
988        return cacheLevel;
989    }
990
991    public void setCacheLevel(int cacheLevel) {
992        if(cacheLevel >= 2) this.cacheLevel = 2;
993        else this.cacheLevel = Math.max(cacheLevel, 0);
994    }
995
996    protected Matcher markupMatcher = Pattern.compile("\\[\\?\\](.*?)(?:\\[\\?\\]|$)").matcher();
997
998    private class BulkCipherSubstitution implements Substitution
999    {
1000        @Override
1001        public void appendSubstitution(MatchResult match, TextBuffer dest) {
1002            if(match instanceof Matcher)
1003            {
1004                wordMatcher.setTarget((Matcher)match, 1);
1005            }
1006            else
1007            {
1008                wordMatcher.setTarget(match.targetChars(), match.start(1) + match.targetStart(), match.length(1));
1009            }
1010            while (wordMatcher.find())
1011            {
1012                wordMatcher.getGroup(MatchResult.PREFIX, dest);
1013                dest.append(lookup(wordMatcher.group()));
1014                wordMatcher.setTarget(wordMatcher, MatchResult.SUFFIX);
1015            }
1016            wordMatcher.getGroup(MatchResult.TARGET, dest);
1017        }
1018    }
1019
1020    /**
1021     * Given a String, StringBuilder, or other CharSequence that should contain words in the source language (almost
1022     * always English, since this only knows English prefixes and suffixes), this finds sections of the text that
1023     * start and end with {@code [?]} and {@code [?]}, translates each word between those start/end markers to the fake
1024     * language, using existing translations if previous calls to cipher() or lookup() had translated that word, and
1025     * removes the {@code [?]} markup afterwards. This is meant for cases where only some words should be translated,
1026     * such as (for example) translating "What the [?]heck?[?]" to "What the grug?" or something like it if the language
1027     * is {@link FakeLanguageGen#GOBLIN}, or "What the xu'oz?" if the language is {@link FakeLanguageGen#DEMONIC}.
1028     * @param text a CharSequence, such as a String, that contains words in the source language and {@code [?]} markup
1029     * @return a String of the translated text with markup-surrounded sections translated and markup removed
1030     */
1031    public String cipherMarkup(CharSequence text)
1032    {
1033        BulkCipherSubstitution cipherSub = new BulkCipherSubstitution();
1034        markupMatcher.setTarget(text);
1035        Replacer.StringBuilderBuffer sb = Replacer.wrap(new StringBuilder(text.length() * 5 >>> 2));
1036        Replacer.replace(markupMatcher, cipherSub, sb);
1037        return sb.toString();
1038    }
1039
1040}