001package squidpony; 002 003import regexodus.*; 004import squidpony.squidmath.NumberTools; 005import squidpony.squidmath.RNG; 006import squidpony.squidmath.StatefulRandomness; 007 008import java.io.Serializable; 009import java.util.HashMap; 010import java.util.Map; 011 012/** 013 * Class that builds up a dictionary of words in an English-language source text to words generated by a 014 * {@link FakeLanguageGen}, and can translate a source text to a similarly-punctuated, similarly-capitalized fake text; 015 * it will try to use variants on the translation of the same root word when it encounters conjugations of that root 016 * word or that root word with common English prefixes/suffixes. Performs basic stemming to separate a root word from 017 * prefixed, suffixes, and conjugation changes, then uses a phonetic hash of each such separate section to determine the 018 * RNG seed that FakeLanguageGen will use, so the translation is not random (similar-sounding root words with similar 019 * length will tend to be similar in the results as well). Can cipher an English text and generate a text with 020 * FakeLanguageGen, but also decipher such a generated text with a fully-complete, partially-complete, or 021 * partially-incorrect vocabulary. 022 * <br> 023 * This defaults to caching source-language words to their generated-language word translations in the field table, as 024 * well as the reverse translation in reverse. This can be changed to reduce memory usage for large vocabularies with 025 * {@code setCacheLevel()}, where it starts at 2 (writing to table and reverse), and can be lowered to 1 (writing to 026 * table only) if you don't need reverse to decipher a language easily, or to 0 (writing to neither) if you expect that 027 * memory will be at a premium and don't mind re-generating the same word each time it occurs in a source text. If 028 * cacheLevel is 1 or less, then this will not check for overlap between previously-generated words (it won't have an 029 * easy way to look up previously-generated ones) and so may be impossible to accurately decipher. As an example, one 030 * test of level 1 generated "he" as the translation for both "a" and "at", so every time "a" had been ciphered and then 031 * deciphered, the reproduced version said "at" instead. This won't happen by default, but the default instead relies on 032 * words being entered as inputs to cipher() or lookup() in the same order. If words are entered in two different orders 033 * to different runs of the program, they may have different generated results if cacheLevel is 2. One way to handle 034 * this is to use cacheLevel 2 and cipher the whole game script, or just the unique words in it (maybe just a large word 035 * list, such as <a href="http://wordlist.aspell.net/12dicts/">12dicts</a> ), then serialize the NaturalLanguageCipher 036 * for later usage. 037 * @author Tommy Ettinger 038 * Created by Tommy Ettinger on 5/1/2016. 039 */ 040public class NaturalLanguageCipher implements Serializable{ 041 042 private static class SemiRandom implements StatefulRandomness, Serializable{ 043 private static final long serialVersionUID = 1287835632461186341L; 044 public long state; 045 SemiRandom() 046 { 047 state = (long) (Long.MAX_VALUE * (Math.random() * 2.0 - 1.0)); 048 } 049 SemiRandom(long state) 050 { 051 this.state = state; 052 } 053 /** 054 * Get the current internal state of the StatefulRandomness as a long. 055 * 056 * @return the current internal state of this object. 057 */ 058 @Override 059 public long getState() { 060 return state; 061 } 062 063 /** 064 * Set the current internal state of this StatefulRandomness with a long. 065 * 066 * @param state a 64-bit long. You should avoid passing 0, even though some implementations can handle that. 067 */ 068 @Override 069 public void setState(long state) { 070 this.state = state; 071 } 072 073 /** 074 * Using this method, any algorithm that might use the built-in Java Random 075 * can interface with this randomness source. 076 * 077 * @param bits the number of bits to be returned 078 * @return the integer containing the appropriate number of bits 079 */ 080 @Override 081 public int next(int bits) { 082 return (int) ((state += 0x41041041041041L) & ~(-1L << bits)); 083 } 084 085 /** 086 * Using this method, any algorithm that needs to efficiently generate more 087 * than 32 bits of random data can interface with this randomness source. 088 * <p> 089 * Get a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive). 090 * 091 * @return a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive) 092 */ 093 @Override 094 public long nextLong() { 095 return state += 0x41041041041041L; 096 } 097 098 public double nextDouble() 099 { 100 return NumberTools.longBitsToDouble(0x3FFL << 52 | (state += 0x41041041041041L) >>> 12) - 1.0; 101 } 102 103 /** 104 * Produces a copy of this RandomnessSource that, if next() and/or nextLong() are called on this object and the 105 * copy, both will generate the same sequence of random numbers from the point copy() was called. This just need to 106 * copy the state so it isn't shared, usually, and produce a new value with the same exact state. 107 * 108 * @return a copy of this RandomnessSource 109 */ 110 @Override 111 public SemiRandom copy() { 112 return new SemiRandom(state); 113 } 114 } 115 116 private static final long serialVersionUID = 1287835632461186341L; 117 /** 118 * The FakeLanguageGen this will use to construct words; normally one of the static fields in FakeLanguageGen, a 119 * FakeLanguageGen produced by using the {@link FakeLanguageGen#mixAll(Object...)} method of two or more of them, or 120 * a random FakeLanguageGen produced by {@link FakeLanguageGen#randomLanguage(long)}. Manually constructing 121 * FakeLanguageGen objects isn't easy, and if you decide to do that it's recommended you look at SquidLib's source 122 * to see how the existing calls to constructors work. 123 */ 124 public FakeLanguageGen language; 125 private SemiRandom rs; 126 private RNG rng; 127 128 String pluralSuffix, verbingSuffix, verbedSuffix, verberSuffix, verbationSuffix, 129 verbmentSuffix, nounySuffix, nounenSuffix, nounistSuffix, nounismSuffix, 130 nounicSuffix, nouniveSuffix, adjectivelySuffix, adjectivestSuffix, 131 reverbPrefix, ennounPrefix, preverbPrefix, postverbPrefix, 132 proverbPrefix, antiverbPrefix, disnounPrefix; 133 134 private static final long PLURAL = 1L, VERBING = 1L << 1, VERBED = 1L << 2, VERBER = 1L << 3, 135 VERBATION = 1L << 4, VERBMENT = 1L << 5, NOUNY = 1L << 6, NOUNEN = 1L << 7, NOUNIST = 1L << 8, 136 NOUNISM = 1L << 9, NOUNIC = 1L << 10, NOUNIVE = 1L << 11, ADJECTIVELY = 1L << 12, 137 ADJECTIVEST = 1L << 13, REVERB = 1L << 14, PREVERB = 1L << 15, POSTVERB = 1L << 16, ENNOUN = 1L << 17, 138 PROVERB = 1L << 18, ANTIVERB = 1L << 19, DISNOUN = 1L << 20; 139 140 /* 141 qu->kw 142x->ks 143y->i 144kh->q 145ck->k 146ch->x 147cq->kh 148tx->x 149zh->j 150ge->j 151ew->eu 152eigh->ae 153p[fh]->f 154n([gk])->y$1 155a([bdfjlmnprtvz])e->ae$1 156e([bdjlmnptvz])e->ee$1 157i([bdfjlmnprtvz])e->ai$1 158o([bdfjlmnprtvz])e->oa$1 159u([bdfjlmnprtvz])e->uu$1 160([bdfgklmnpqrtvwxz])\1+->$1 161ace$->aes 162ece$->ees 163ice$->ais 164oce$->oas 165uce$->uus 166se$->z 167^[pc]([nts])->$1 168^fth->t 169 170 */ 171 private static final Replacer[] preproc = { 172 new Replacer(Pattern.compile("([bdfgklmnpqrtvwxz])\\1+"), "$1"), 173 new Replacer(Pattern.compile("qu"), "kw", false), 174 new Replacer(Pattern.compile("x"), "ks", false), 175 new Replacer(Pattern.compile("y"), "i", false), 176 new Replacer(Pattern.compile("kh"), "q", false), 177 new Replacer(Pattern.compile("ck"), "k", false), 178 new Replacer(Pattern.compile("ch"), "x", false), 179 new Replacer(Pattern.compile("cq"), "kh", false), 180 new Replacer(Pattern.compile("tx"), "x", false), 181 new Replacer(Pattern.compile("zh"), "j", false), 182 new Replacer(Pattern.compile("ge$"), "j", false), 183 new Replacer(Pattern.compile("we$"), "w", false), 184 new Replacer(Pattern.compile("ew"), "eu", false), 185 new Replacer(Pattern.compile("eigh"), "ae", false), 186 new Replacer(Pattern.compile("p[fh]"), "f", false), 187 new Replacer(Pattern.compile("nc"), "yk", false), 188 new Replacer(Pattern.compile("n([gk])"), "y$1"), 189 new Replacer(Pattern.compile("a([bdfjlmnprtvz])e"), "ae$1"), 190 new Replacer(Pattern.compile("e([bdjlmnptvz])e"), "ee$1"), 191 new Replacer(Pattern.compile("i([bdfjlmnprtz])e"), "ai$1"), 192 new Replacer(Pattern.compile("o([bdfjlmnprtvz])e"), "oa$1"), 193 new Replacer(Pattern.compile("u([bdfjlmnprtvz])e"), "uu$1"), 194 new Replacer(Pattern.compile("ace$"), "aes", false), 195 new Replacer(Pattern.compile("ece$"), "ees", false), 196 new Replacer(Pattern.compile("ice$"), "ais", false), 197 new Replacer(Pattern.compile("oce$"), "oas", false), 198 new Replacer(Pattern.compile("uce$"), "uus", false), 199 new Replacer(Pattern.compile("se$"), "z", false), 200 new Replacer(Pattern.compile("e$"), "", false), 201 new Replacer(Pattern.compile("^[pc]([nts])"), "$1"), 202 new Replacer(Pattern.compile("^fth"), "t", false), 203 }, conjugationProc = { // 17 is REFlags.UNICODE | REFlags.IGNORE_CASE 204 new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" + 205 "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\2" + 206 "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])", 17), "$1$2$1$2$3"), 207 new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" + 208 "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" + 209 "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\3", 17), "$1$2$3$1$3"), 210 new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]{3})" + 211 "(?:[^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)", 17), "$1"), 212 new Replacer(Pattern.compile("([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" + 213 "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])(?:\\1\\2)+", 17), "$1$2"), 214 new Replacer(Pattern.compile("[æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy]([æǽœýÿŷỳy])", 17), "$1"), 215 new Replacer(Pattern.compile("q([ùúûüũūŭůűųu])$", 17), "q$1e"), 216 new Replacer(Pattern.compile("([ìíîïĩīĭįıi])[ìíîïĩīĭįıi]", 17), "$1"), 217 new Replacer(Pattern.compile("([æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy])[wŵẁẃẅ]$", 17), "$1"), 218 new Replacer(Pattern.compile("([ùúûüũūŭůűųu])([òóôõöøōŏőǿo])", 17), "$2$1"), 219 new Replacer(Pattern.compile("[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]([æǽœ])", 17), "$1"), 220 new Replacer(Pattern.compile("([æǽœ])[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]", 17), "$1"), 221 new Replacer(Pattern.compile("([wŵẁẃẅ])[wŵẁẃẅ]", 17), "$1"), 222 new Replacer(Pattern.compile("q{2,}", 17), "q") 223 }; 224 225 static final long[] bigrams = { 226//a 227 5, 22, 20, 22, 21, 22, 22, 5, 11, 20, 22, 4, 22, 22, 20, 22, 4, 4, 22, 22, 5, 22, 5, 22, 20, 22, 8, 228//b 229 52, 52, 52, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 230//c 231 58, 58, 58, 58, 33, 58, 39, 58, 32, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 232//d 233 60, 60, 60, 60, 60, 60, 60, 61, 60, 39, 60, 60, 60, 60, 60, 60, 60, 60, 60, 63, 60, 60, 60, 60, 60, 60, 60, 234//e 235 19, 6, 18, 6, 19, 6, 6, 7, 19, 6, 6, 6, 6, 6, 18, 6, 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 0, 236//f 237 42, 42, 42, 42, 42, 42, 42, 43, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 238//g 239 56, 56, 56, 56, 56, 56, 56, 41, 56, 56, 56, 56, 56, 51, 56, 56, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 56, 240//h 241 24, 53, 59, 61, 24, 43, 57, 24, 24, 39, 59, 31, 49, 51, 24, 55, 47, 29, 33, 63, 24, 41, 27, 47, 51, 35, 0, 242//i 243 16, 16, 16, 16, 11, 16, 16, 17, 11, 16, 16, 16, 16, 16, 18, 16, 16, 2, 16, 16, 16, 16, 16, 16, 18, 16, 18, 244//j 245 38, 38, 38, 38, 38, 38, 38, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 246//k 247 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 248//l 249 30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 250//m 251 48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 252//n 253 50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 254//o 255 14, 4, 4, 4, 14, 4, 4, 15, 2, 4, 4, 14, 4, 4, 2, 4, 4, 14, 4, 4, 12, 4, 14, 4, 4, 4, 14, 256//p 257 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 258//q 259 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 260//r 261 28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 262//s 263 32, 32, 32, 34, 32, 32, 32, 37, 32, 32, 32, 32, 34, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 34, 34, 264//t 265 62, 62, 62, 63, 62, 62, 62, 45, 62, 62, 62, 58, 62, 62, 62, 62, 62, 62, 62, 63, 62, 62, 62, 47, 62, 62, 62, 266//u 267 26, 8, 8, 8, 12, 8, 8, 9, 26, 8, 8, 2, 8, 8, 2, 8, 8, 2, 8, 8, 13, 8, 13, 8, 8, 8, 12, 268//v 269 40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40, 270//w 271 26, 53, 59, 61, 26, 43, 57, 24, 26, 39, 59, 31, 49, 51, 26, 55, 47, 29, 35, 63, 24, 41, 27, 47, 51, 35, 0, 272//x 273 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 274//y 275 50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 46, 50, 51, 50, 50, 276//z 277 34, 34, 34, 34, 34, 34, 34, 39, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 34, 34, 34, 34, 34, 34, 34, 34, 278 }; 279 280 281 // not an OrderedMap because this should never be need a random element to be requested 282 /** 283 * The mapping of lower-case word keys to lower-case word values, where keys are in the source language and values 284 * are generated by language. 285 */ 286 public HashMap<String, String> table, 287 /** 288 * The mapping of lower-case word keys to lower-case word values, where keys are generated by language and values 289 * are in the source language. Can be used as a complete vocabulary when passed to decipher. 290 */ 291 reverse; 292 private static final Pattern wordPattern = Pattern.compile("(\\pL+)|(\\pL[\\pL-]*\\pL)"); 293 private static final Matcher wordMatcher = wordPattern.matcher(); 294 295 /** 296 * The degree of vocabulary to cache to speed up future searches at the expense of memory usage. 297 * <ul> 298 * <li>2 will cache source words to generated words in table, and generated to source in reverse.</li> 299 * <li>1 will cache source words to generated words in table, and won't write to reverse.</li> 300 * <li>0 won't write to table or reverse.</li> 301 * </ul> 302 * Defaults to 2, writing to both table and reverse. 303 */ 304 public int cacheLevel = 2; 305 306 public long shift; 307 308 /** 309 * Constructs a NaturalLanguageCipher that will generate simplified English-like text by default (this uses 310 * {@link FakeLanguageGen#SIMPLISH}). 311 */ 312 public NaturalLanguageCipher() 313 { 314 this(FakeLanguageGen.SIMPLISH); 315 } 316 317 /** 318 * Constructs a NaturalLanguageCipher that will use the given style of language generator to produce its text. 319 * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them. 320 */ 321 public NaturalLanguageCipher(FakeLanguageGen language) 322 { 323 this(language, 0); 324 } 325 326 private Pattern[] additionalPrefixChecks = { 327 //17 is REFlags.UNICODE | REFlags.IGNORE_CASE 328 Pattern.compile("(?:(?:[pрρ][hн])|[fd])[aаαiτιuμυνv]$", 17), 329 Pattern.compile("[kкκcсςq][uμυνv]$", 17), 330 Pattern.compile("[bъыбвβЪЫБ][iτι][tтτг]$", 17), 331 Pattern.compile("[sξζzcсς](?:[hн]?)[iτιyуλγУ]$", 17), 332 Pattern.compile("[aаαΛ][nи][aаαΛiτιyуλγУuμυνvoоюσο]*$", 17), 333 Pattern.compile("[tтτΓг][iτιyуλγУ]+$", 17), 334 Pattern.compile("[cсςkкκq][lι]?[iτιyуλγУ]+$", 17), 335 Pattern.compile("[aаαΛ][sξζz]$", 17), 336 Pattern.compile("[nиfvν][iτιyуλγУaаαΛ]+$", 17), 337 Pattern.compile("[pрρ][eезξεЗΣoоюσοiτιyуλγУuμυνv]+$", 17), 338 Pattern.compile("[g][hн]?[aаαΛeезξεЗΣyуλγУ]+$", 17), 339 Pattern.compile("[wψшщuμυνv](?:[hн]?)[aаαΛeезξεЗΣoоюσοuμυνv]+$", 17), 340 }, additionalSuffixChecks = { 341 Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]*)(?:[nи]+)[tтτΓгdgkкκcсςq]", 17), 342 Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]+)(?:[nи]*)[tтτΓгdgkкκcсςq]", 17), 343 Pattern.compile("^(?:[iτιyуλγУaаαΛ]*)[gj]", 17), 344 Pattern.compile("^[nи]..?[Ssξlιζz]", 17), 345 Pattern.compile("^[iτιyуλγУaаαΛ][dtтτΓг]", 17), 346 Pattern.compile("^[iτιyуλγУaаαΛ][kкκcсςq][kкκcсςq]", 17), 347 Pattern.compile("^[uμυ]*[mм]", 17), 348 }; 349 350 private String addPart(String original, int syllables) 351 { 352 String done; 353 Pattern[] checks = null; 354 if(original.endsWith("-")) 355 { 356 checks = additionalPrefixChecks; 357 } 358 else if(original.startsWith("-")) 359 { 360 checks = additionalSuffixChecks; 361 } 362 //syllables <<= 1; 363 do { 364 done = language.word(rng, false, syllables, checks); 365 if(cacheLevel < 2 || ++syllables > 5) 366 break; 367 }while(reverse.containsKey(done)); 368 switch (cacheLevel) { 369 case 2: reverse.put(done, original); 370 case 1: table.put(original, done); 371 } 372 return done; 373 } 374 375 /** 376 * Constructs a NaturalLanguageCipher that will use the given style of language generator to produce its text, using 377 * the specified {@code shift} as a long to modify the generated words from the language's normal results. 378 * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them. 379 * @param shift any long; this will be used to alter the specific words generated unless it is 0 380 */ 381 public NaturalLanguageCipher(FakeLanguageGen language, long shift) 382 { 383 rs = new SemiRandom(0xDF58476D1CE4E5B9L + shift); 384 rng = new RNG(rs); 385 table = new HashMap<>(512, 0.375f); 386 reverse = new HashMap<>(512, 0.375f); 387 initialize(language, shift); 388 } 389 390 /** 391 * Changes the language this can cipher, clearing its known translation (if any) and using the given FakeLanguageGen 392 * and shift as if given to {@link #NaturalLanguageCipher(FakeLanguageGen, long)}. 393 * @param language the FakeLanguageGen to change to 394 * @param shift any long; this will be used to alter the specific words generated unless it is 0 395 * @return this for chaining 396 */ 397 public NaturalLanguageCipher initialize(FakeLanguageGen language, long shift) 398 { 399 rs.state = 0xDF58476D1CE4E5B9L + shift; 400 this.shift = shift; 401 this.language = language.copy(); 402 table.clear(); 403 reverse.clear(); 404 pluralSuffix = addPart("-s", 0); 405 nounySuffix = addPart("-y", 0); 406 nounicSuffix = addPart("-ic", 0); 407 nouniveSuffix = addPart("-ive", 0); 408 nounistSuffix = addPart("-ist", 0); 409 nounismSuffix = addPart("-ism", 1 + (rng.nextSignedInt(3) >> 1)); 410 nounenSuffix = addPart("-en", 0); 411 verbedSuffix = addPart("-ed", 0); 412 verberSuffix = addPart("-er", 0); 413 verbingSuffix = addPart("-ing", 1); 414 verbmentSuffix = addPart("-ment", 0); 415 verbationSuffix = addPart("-ation", rng.nextSignedInt(2) + 1); 416 adjectivelySuffix = addPart("-ly", 0); 417 adjectivestSuffix = addPart("-est", 0); 418 reverbPrefix = addPart("re-", 0); 419 ennounPrefix = addPart("en-", 0); 420 preverbPrefix = addPart("pre-", 0); 421 proverbPrefix = addPart("pro-", 0); 422 postverbPrefix = addPart("post-", 0); 423 antiverbPrefix = addPart("anti-", 2 - (rng.nextSignedInt(3) >> 1)); 424 disnounPrefix = addPart("dis-", 0); 425 table.clear(); 426 reverse.clear(); 427 return this; 428 } 429 430 431 /** 432 * Copies another NaturalLanguageCipher and constructs this one with the information in the other. Copies the dictionary 433 * of known words/prefixes/suffixes/conjugations, as well as the FakeLanguageGen style and everything else. 434 * @param other a previously-constructed NaturalLanguageCipher. 435 */ 436 public NaturalLanguageCipher(NaturalLanguageCipher other) 437 { 438 language = other.language.copy(); 439 rs = other.rs.copy(); 440 rng = new RNG(rs); 441 table = new HashMap<>(other.table.size(), 0.375f); 442 table.putAll(other.table); 443 reverse = new HashMap<>(other.reverse.size(), 0.375f); 444 reverse.putAll(other.reverse); 445 shift = other.shift; 446 pluralSuffix = other.pluralSuffix; 447 nounySuffix = other.nounySuffix; 448 nounicSuffix = other.nounicSuffix; 449 nouniveSuffix = other.nouniveSuffix; 450 nounistSuffix = other.nounistSuffix; 451 nounismSuffix = other.nounismSuffix; 452 nounenSuffix = other.nounenSuffix; 453 verbedSuffix = other.verbedSuffix; 454 verberSuffix = other.verberSuffix; 455 verbingSuffix = other.verbingSuffix; 456 verbmentSuffix = other.verbmentSuffix; 457 verbationSuffix = other.verbationSuffix; 458 adjectivelySuffix = other.adjectivelySuffix; 459 adjectivestSuffix = other.adjectivestSuffix; 460 reverbPrefix = other.reverbPrefix; 461 ennounPrefix = other.ennounPrefix; 462 preverbPrefix = other.preverbPrefix; 463 postverbPrefix = other.postverbPrefix; 464 proverbPrefix = other.proverbPrefix; 465 antiverbPrefix = other.antiverbPrefix; 466 disnounPrefix = other.disnounPrefix; 467 } 468 469 /** 470 * Gets a phonetic hash of a section of {@code data} between {@code start} inclusive and {@code end} exclusive; this 471 * 64-bit hash should be similar for similar words, instead of very different if they are different at all. The 472 * algorithm is conceptually related to a locality-sensitive hash, and is inspired by 473 * <a href="https://github.com/ticki/eudex">Eudex</a>; like Eudex, the Hamming distance between the hashes of two 474 * similar words should be low, even if the values are very different on a number line. The input to this must 475 * contain lower-case ASCII letters, since that is all this knows how to read (characters not between 'a' and 'z' 476 * are ignored). In NaturalLanguageCipher, the hashes this produces are given as seeds to an 477 * intentionally-low-quality RandomnessSource that produces similar results for similar input states, which makes 478 * it likely to generate output words that are similar to each other when the input words are similar to each other. 479 * @param data a char array that should contain letters from 'a' to 'z' this can hash 480 * @param start the starting position in data to read, inclusive 481 * @param end the end position in data to stop reading at, exclusive 482 * @return a 64-bit long hash that should have a low Hamming distance to phonetic hashes of similar words 483 */ 484 public static long phoneticHash64(char[] data, int start, int end) 485 { 486 if(data == null || end <= start || start >= data.length) 487 return 0L; 488 int current, next, count = 0, used = 0; 489 long got, vc = 0, h = 0L; 490 boolean vowelStream = false; 491 for (int i = start; i < end && count < 10; i++, count++) { // && vc < 7 492 current = data[i] - 'a'; 493 if(current < 0 || current > 26) 494 continue; 495 if(i + 1 < end) { 496 if((next = data[i + 1] - 'a') < 0 || next > 26) 497 continue; 498 } 499 else 500 next = 26; 501 got = bigrams[27 * current + next]; 502 if(got == 0) 503 continue; 504 h <<= 6; 505 //b <<= 3; 506 got = bigrams[27 * current + next]; 507 i += got & 1L; 508 h |= got >>= 1; 509 used++; 510 //used += 5; 511 if(count == 0) { 512 vowelStream = got > 0 && got < 12; 513 }else if (vowelStream != (got > 0 && got < 12)) { 514 vc+= vowelStream ? 1 : 0; 515 vowelStream = !vowelStream; 516 } 517 //b |= (got >> 2); 518 } 519 vc += vowelStream ? 1 : 0; 520 521 if(used > 0 && count > 0) { 522 got = h; 523 for (; count < 11; count += used) { 524 h |= got << (6 * count); 525 } 526 h &= 0xFFFFFFFFFFFFFFFL; // 60 bits 527 } 528 /*b &= ~(-1 << (35-Math.min(used, 35))); 529 if(used <= 20) 530 b ^= b << 8; 531 h ^= ((vc & 7L) << 39) | (b << (used + 3)); 532 */ 533 vc = Math.max(1L, vc); 534 return h | ((vc & 15L) << 60); 535 } 536 537 private String conjugate(String data, long mods) 538 { 539 if(data == null) 540 return ""; 541 StringBuilder sb = new StringBuilder(data); 542 543 if((mods & ENNOUN) != 0) 544 { 545 sb.insert(0, ennounPrefix); 546 } 547 if((mods & DISNOUN) != 0) 548 { 549 sb.insert(0, disnounPrefix); 550 } 551 if((mods & REVERB) != 0) 552 { 553 sb.insert(0, reverbPrefix); 554 } 555 if((mods & ANTIVERB) != 0) 556 { 557 sb.insert(0, antiverbPrefix); 558 } 559 if((mods & PROVERB) != 0) 560 { 561 sb.insert(0, proverbPrefix); 562 } 563 if((mods & POSTVERB) != 0) 564 { 565 sb.insert(0, postverbPrefix); 566 } 567 if((mods & PREVERB) != 0) 568 { 569 sb.insert(0, preverbPrefix); 570 } 571 if((mods & NOUNEN) != 0) { 572 sb.append(nounenSuffix); 573 } 574 if((mods & VERBER) != 0) { 575 sb.append(verberSuffix); 576 } 577 if((mods & VERBMENT) != 0) { 578 sb.append(verbmentSuffix); 579 } 580 if((mods & VERBATION) != 0) { 581 sb.append(verbationSuffix); 582 } 583 if((mods & NOUNIVE) != 0) { 584 sb.append(nouniveSuffix); 585 } 586 if((mods & NOUNISM) != 0) { 587 sb.append(nounismSuffix); 588 } 589 if((mods & NOUNIST) != 0) { 590 sb.append(nounistSuffix); 591 } 592 if((mods & NOUNIC) != 0) { 593 sb.append(nounicSuffix); 594 } 595 if((mods & ADJECTIVEST) != 0) { 596 sb.append(adjectivestSuffix); 597 } 598 if((mods & VERBED) != 0) { 599 sb.append(verbedSuffix); 600 } 601 if((mods & VERBING) != 0) { 602 sb.append(verbingSuffix); 603 } 604 if((mods & NOUNY) != 0) { 605 sb.append(nounySuffix); 606 } 607 if((mods & ADJECTIVELY) != 0) { 608 sb.append(adjectivelySuffix); 609 } 610 if((mods & PLURAL) != 0) { 611 sb.append(pluralSuffix); 612 } 613 String done = sb.toString(); 614 for(int conproc = 0; conproc < conjugationProc.length; conproc++) 615 { 616 done = conjugationProc[conproc].replace(done); 617 } 618 return done; 619 } 620 /** 621 * Given a word in the source language (usually English), looks up an existing translation for that word, or if none 622 * exists, generates a new word based on the phonetic hash of the source word, any of its stemming information such 623 * as prefixes or suffixes, and this NaturalLanguageCipher's FakeLanguageGen. 624 * @param source a word in the source language 625 * @return a word in the fake language 626 */ 627 public String lookup(String source) 628 { 629 if(source == null || source.isEmpty()) 630 return ""; 631 String s2 = source.toLowerCase(), ciphered; 632 if(table.containsKey(s2)) 633 ciphered = table.get(s2); 634 else { 635 CharSequence altered = FakeLanguageGen.removeAccents(s2); 636 for (int i = 0; i < preproc.length; i++) { 637 altered = preproc[i].replace(altered); 638 } 639 640 char[] sc = ((String)altered).toCharArray(), scO = s2.toCharArray(); 641 int start = 0, end = sc.length, endO = scO.length; 642 long mods = 0; 643 /* 644 boolean plural = false, verbing = false, verbed = false, verber = false, verbation = false, 645 verbment = false, nouny = false, nounen = false, nounist = false, nounism = false, 646 nounic = false, nounive = false, adjectively = false, adjectivest = false, 647 //prefixes 648 reverb = false, ennoun = false, preverb = false, postverb = false, 649 proverb = false, antiverb = false, disnoun = false; 650 */ 651 if(end >= 4 && endO >= 4 && sc[end-1]=='s' && sc[end-2]!='s') // checking for extra 's' helps singular nouns like "dress" and "princess" 652 { 653 mods |= PLURAL; 654 end--; 655 endO--; 656 if(scO[endO-1] == 'e') 657 { 658 end--; 659 endO--; 660 } 661 } 662 if(end >= 5 && endO >= 5 && sc[end - 2] == 'l' && sc[end-1] == 'y') 663 { 664 mods |= ADJECTIVELY; 665 end -= 2; 666 endO -= 2; 667 } 668 /* 669 else if(end >= 4 && endO >= 4 && scO[endO-1] == 'y') 670 { 671 mods |= NOUNY; 672 end--; 673 endO--; 674 }*/ 675 if(end >= 5 && endO >= 5 && scO[endO-3] == 'i' && scO[endO-2] == 'n' && scO[endO-1]=='g') 676 { 677 mods |= VERBING; 678 end-=3; 679 endO -= 3; 680 } 681 if(end >= 4 && endO >= 4 && (scO[endO-3] == 'a' || scO[endO-3] == 'o') && scO[endO-2] == 'd' && scO[endO-1]=='e') 682 { 683 mods |= VERBED; 684 end-=3; 685 endO-=3; 686 } 687 else if(end >= 4 && endO >= 4 && scO[endO-2] == 'e' && scO[endO-1] == 'd') 688 { 689 mods |= VERBED; 690 end-=2; 691 endO-=2; 692 } 693 else if(end >= 5 && endO >= 5 && sc[end - 3] == 'e' && sc[end - 2] == 's' && sc[end-1] == 't') 694 { 695 mods |= ADJECTIVEST; 696 end -= 3; 697 endO -= 3; 698 } 699 if(end >= 5 && endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'c') 700 { 701 mods |=NOUNIC; 702 end -= 2; 703 endO-=2; 704 } 705 else if(end >= 6 && endO >= 6 && scO[endO-3] == 'i' && scO[endO-2] == 'v' && scO[endO-1] == 'e') { 706 mods |= NOUNIVE; 707 end -= 3; 708 endO -= 3; 709 if (end >= 4 && endO >= 4 && (scO[endO - 2] == 'a' || scO[endO - 2] == 'i') && scO[endO - 1] == 't') { 710 end -= 2; 711 endO -= 2; 712 } 713 } 714 if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 't') 715 { 716 mods |=NOUNIST; 717 end -= 3; 718 endO-=3; 719 if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v') 720 { 721 mods |= NOUNIVE; 722 end-=2; 723 endO-=2; 724 } 725 } 726 if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 'm') 727 { 728 mods |=NOUNISM; 729 end -= 3; 730 endO -= 3; 731 if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v') 732 { 733 mods |= NOUNIVE; 734 end-=2; 735 endO-=2; 736 } 737 738 } 739 if(end >= 8 && endO >= 8 && (scO[endO - 4] == 't' || scO[endO - 4] == 's' || scO[endO - 4] == 'c') && scO[endO-3] == 'i' && scO[endO-2] == 'o' && scO[endO-1]=='n') 740 { 741 mods |=VERBATION; 742 end-=4; 743 endO -= 4; 744 } 745 if(end >= 6 && sc[end-4] == 'm' && sc[end-3] == 'e' && sc[end-2] == 'n' && sc[end-1] == 't') 746 { 747 mods |=VERBMENT; 748 end-=4; 749 endO -= 4; 750 } 751 if(end >= 7 && endO >= 7 && scO[endO-3] == 'i' && scO[endO-2] == 'a' && scO[endO-1]=='n') 752 { 753 mods |=VERBER; 754 end-=3; 755 } 756 else if(end >= 4 && endO >= 4 && (sc[end-2] == 'e' || sc[end-2] == 'o') && sc[end-1] == 'r') 757 { 758 mods |= VERBER; 759 end-=2; 760 } 761 if(end >= 4 && sc[end-2] == 'e' && sc[end-1]=='n') 762 { 763 mods |=NOUNEN; 764 end-=2; 765 } 766 if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'e') 767 { 768 mods |=PREVERB; 769 start += 3; 770 } 771 if(end - start >= 6 && sc[start] == 'p' && sc[start+1] == 'o' && sc[start+2] == 's' && sc[start+3] == 't') 772 { 773 mods |= POSTVERB; 774 start += 4; 775 } 776 777 if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'o') 778 { 779 mods |= PROVERB; 780 start += 3; 781 } 782 else { 783 if (end - start >= 6 && sc[start] == 'a' && sc[start + 1] == 'n' && sc[start + 2] == 't' && sc[start + 3] == 'i') { 784 mods |= ANTIVERB; 785 start += 4; 786 } 787 else if (end - start >= 8 && sc[start] == 'c' && sc[start + 1] == 'o' && sc[start + 2] == 'n' && sc[start + 3] == 't' && sc[start + 4] == 'r' && sc[start + 5] == 'a') { 788 mods |= ANTIVERB; 789 start += 6; 790 } 791 } 792 if(end - start >= 4 && sc[start] == 'r' && sc[start+1] == 'e') 793 { 794 mods |= REVERB; 795 start += 2; 796 } 797 if(end - start >= 5 && sc[start] == 'd' && sc[start+1] == 'i' && sc[start+2] == 's') 798 { 799 mods |= DISNOUN; 800 start += 3; 801 } 802 if(end - start >= 4 && sc[start] == 'u' && sc[start+1] == 'n') 803 { 804 mods |= ANTIVERB; 805 start += 2; 806 } 807 if(end - start >= 4 && (sc[start] == 'e' || sc[start] == 'i') && sc[start+1] == 'n') 808 { 809 mods |= ENNOUN; 810 start += 2; 811 } 812 long h = phoneticHash64(sc, start, end) ^ (shift & 0xFFFFFFFFFFFFFFFL) ^ (shift >>> 14), frustration = 0; 813 //System.out.print(source + ":" + ((h >>> 60) & 7) + ":" + StringKit.hex(h) + ", "); 814 rs.setState(h); 815 do { 816 ciphered = conjugate(language.word(rng, false, (int) Math.ceil((h >>> 60) / (0.9 + 0.5 * rng.nextDouble()))), mods); 817 if(cacheLevel < 2 || frustration++ > 9) 818 break; 819 }while (reverse.containsKey(ciphered)); 820 switch (cacheLevel) { 821 case 2: reverse.put(ciphered, s2); 822 case 1: table.put(s2, ciphered); 823 } 824 } 825 char[] chars = ciphered.toCharArray(); 826 // Lu is the upper case letter category in Unicode; we're using regexodus for this because GWT won't 827 // respect unicode case data on its own (see 828 // https://github.com/gwtproject/gwt/blob/2.6.1/user/super/com/google/gwt/emul/java/lang/Character.java#L54-L61 829 // ). We are using GWT to capitalize, though, which appears to work in practice and the docs agree. 830 if(Category.Lu.contains(source.charAt(0))) 831 chars[0] = Character.toUpperCase(chars[0]); 832 if(source.length() > 1 && Category.Lu.contains(source.charAt(1))) { 833 for (int i = 1; i < chars.length; i++) { 834 chars[i] = Character.toUpperCase(chars[i]); 835 } 836 } 837 return new String(chars); 838 } 839 840 /** 841 * Given a String that should contain words in the source language, this translates each word to the fake language, 842 * using existing translations if previous calls to cipher() or lookup() had translated that word. 843 * @param text a String that contains words in the source language 844 * @return a String of the translated text. 845 */ 846 public String cipher(String text) 847 { 848 Replacer rep = wordPattern.replacer(new CipherSubstitution()); 849 return rep.replace(text.replace('-', '\u2013')); 850 } 851 852 private class CipherSubstitution implements Substitution 853 { 854 @Override 855 public void appendSubstitution(MatchResult match, TextBuffer dest) { 856 dest.append(lookup(match.group(0))); 857 } 858 } 859 private static class DecipherSubstition implements Substitution 860 { 861 private final Map<String, String> vocabulary; 862 DecipherSubstition(final Map<String, String> vocabulary) 863 { 864 this.vocabulary = vocabulary; 865 } 866 public void appendSubstitution(MatchResult match, TextBuffer dest) { 867 String translated = match.group(0); 868 if(translated == null) { 869 return; 870 } 871 translated = translated.toLowerCase(); 872 translated = vocabulary.get(translated); 873 if(translated == null) { 874 dest.append(match.group(0)); 875 return; 876 } 877 char[] chars = translated.toCharArray(); 878 if(Category.Lu.contains(match.charAt(0))) 879 chars[0] = Character.toUpperCase(chars[0]); 880 if(match.length() > 1 && Category.Lu.contains(match.charAt(1))) { 881 for (int i = 1; i < chars.length; i++) { 882 chars[i] = Character.toUpperCase(chars[i]); 883 } 884 } 885 dest.append(chars, 0, chars.length); 886 } 887 } 888 889 /** 890 * Deciphers words in an already-ciphered text with a given String-to-String Map for a vocabulary. This Map could be 891 * the reverse field of this NaturalLanguageCipher, which would give a complete translation, or it could be a 892 * partially-complete or partially-correct vocabulary of words the player has learned. The vocabulary should 893 * typically have entries added using the quick and accurate {@link #learnTranslations(Map, String...)} method, 894 * unless you want to add translations one word at a time (then use {@link #learnTranslation(Map, String)}) or you 895 * want incorrect or biased translations added (then use {@link #mismatchTranslation(Map, String, String)}). You 896 * don't need to use one of these methods if you just pass the whole of the reverse field as a vocabulary, which 897 * will translate every word. If making your own vocabulary without the learn methods, the keys need to be 898 * lower-case because while regex Patterns can be case-insensitive, the Maps used here are not. 899 * @param text a text in the fake language, as a CharSequence such as a String or StringBuilder 900 * @param vocabulary a Map of Strings in the fake language to Strings in the source language 901 * @return a String of deciphered text that has any words as keys in vocabulary translated to the source language 902 */ 903 public String decipher(CharSequence text, final Map<String, String> vocabulary) 904 { 905 Pattern pat; 906 Replacer rep; 907 StringBuilder sb = new StringBuilder(128); 908 sb.append("(?:"); 909 for(String k : vocabulary.keySet()) 910 { 911 sb.append("(?:\\Q").append(k).append("\\E)|"); 912 } 913 sb.deleteCharAt(sb.length() - 1) 914 .append(')'); 915 916 pat = Pattern.compile("(?<![\\pL\\&-])(?=[\\pL\\&-])" + sb + "(?![\\pL\\&-])", "ui"); 917 918 rep = pat.replacer(new DecipherSubstition(vocabulary)); 919 return rep.replace(text); 920 } 921 922 /** 923 * Adds a translation pair to vocabulary so it can be used in decipher, giving a correct translation for sourceWord. 924 * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct a 925 * mismatched translation added to vocabulary with mismatchTranslation(). 926 * @param vocabulary a Map of String keys to String values that will be modified in-place 927 * @param sourceWord a word in the source language, typically English; the meaning will be "learned" for decipher 928 * @return this, for chaining 929 */ 930 public NaturalLanguageCipher learnTranslation(Map<String, String> vocabulary, String sourceWord) 931 { 932 vocabulary.put(lookup(sourceWord.toLowerCase()), sourceWord); 933 return this; 934 } 935 936 /** 937 * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords. 938 * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct 939 * mismatched translations added to vocabulary with mismatchTranslation(). 940 * @param vocabulary a Map of String keys to String values that will be modified in-place 941 * @param sourceWords an array or vararg of words in the source language, typically English; their meanings will 942 * be "learned" for decipher 943 * @return this, for chaining 944 */ 945 public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, String... sourceWords) 946 { 947 for (int i = 0; i < sourceWords.length; i++) { 948 learnTranslation(vocabulary, sourceWords[i]); 949 } 950 return this; 951 } 952 953 /** 954 * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords. 955 * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. Can be used to correct 956 * mismatched translations added to vocabulary with mismatchTranslation(). 957 * @param vocabulary a Map of String keys to String values that will be modified in-place 958 * @param sourceWords an Iterable of words in the source language, typically English; their meanings will be 959 * "learned" for decipher 960 * @return this, for chaining 961 */ 962 public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, Iterable<String> sourceWords) 963 { 964 for (String s : sourceWords) { 965 learnTranslation(vocabulary, s); 966 } 967 return this; 968 } 969 970 /** 971 * Adds a translation pair to vocabulary so it can be used in decipher, giving a typically-incorrect translation for 972 * correctWord where it provides mismatchWord instead when the ciphered version of correctWord appears. 973 * Modifies vocabulary in-place and returns this NaturalLanguageCipher for chaining. You can use learnTranslation() 974 * to correct a mismatched vocabulary word, or mismatchTranslation() again to change the mismatched word. 975 * @param vocabulary a Map of String keys to String values that will be modified in-place 976 * @param correctWord a word in the source language, typically English; where the ciphered version of this 977 * appears and the text is deciphered, mismatchWord will be used instead 978 * @param mismatchWord a String that will be used for deciphering in place of the translation of correctWord. 979 * @return this, for chaining 980 */ 981 public NaturalLanguageCipher mismatchTranslation(Map<String, String> vocabulary, String correctWord, String mismatchWord) 982 { 983 vocabulary.put(lookup(correctWord.toLowerCase()), mismatchWord); 984 return this; 985 } 986 987 public int getCacheLevel() { 988 return cacheLevel; 989 } 990 991 public void setCacheLevel(int cacheLevel) { 992 if(cacheLevel >= 2) this.cacheLevel = 2; 993 else this.cacheLevel = Math.max(cacheLevel, 0); 994 } 995 996 protected Matcher markupMatcher = Pattern.compile("\\[\\?\\](.*?)(?:\\[\\?\\]|$)").matcher(); 997 998 private class BulkCipherSubstitution implements Substitution 999 { 1000 @Override 1001 public void appendSubstitution(MatchResult match, TextBuffer dest) { 1002 if(match instanceof Matcher) 1003 { 1004 wordMatcher.setTarget((Matcher)match, 1); 1005 } 1006 else 1007 { 1008 wordMatcher.setTarget(match.targetChars(), match.start(1) + match.targetStart(), match.length(1)); 1009 } 1010 while (wordMatcher.find()) 1011 { 1012 wordMatcher.getGroup(MatchResult.PREFIX, dest); 1013 dest.append(lookup(wordMatcher.group())); 1014 wordMatcher.setTarget(wordMatcher, MatchResult.SUFFIX); 1015 } 1016 wordMatcher.getGroup(MatchResult.TARGET, dest); 1017 } 1018 } 1019 1020 /** 1021 * Given a String, StringBuilder, or other CharSequence that should contain words in the source language (almost 1022 * always English, since this only knows English prefixes and suffixes), this finds sections of the text that 1023 * start and end with {@code [?]} and {@code [?]}, translates each word between those start/end markers to the fake 1024 * language, using existing translations if previous calls to cipher() or lookup() had translated that word, and 1025 * removes the {@code [?]} markup afterwards. This is meant for cases where only some words should be translated, 1026 * such as (for example) translating "What the [?]heck?[?]" to "What the grug?" or something like it if the language 1027 * is {@link FakeLanguageGen#GOBLIN}, or "What the xu'oz?" if the language is {@link FakeLanguageGen#DEMONIC}. 1028 * @param text a CharSequence, such as a String, that contains words in the source language and {@code [?]} markup 1029 * @return a String of the translated text with markup-surrounded sections translated and markup removed 1030 */ 1031 public String cipherMarkup(CharSequence text) 1032 { 1033 BulkCipherSubstitution cipherSub = new BulkCipherSubstitution(); 1034 markupMatcher.setTarget(text); 1035 Replacer.StringBuilderBuffer sb = Replacer.wrap(new StringBuilder(text.length() * 5 >>> 2)); 1036 Replacer.replace(markupMatcher, cipherSub, sb); 1037 return sb.toString(); 1038 } 1039 1040}