001package squidpony;
002
003import regexodus.MatchResult;
004import regexodus.Matcher;
005import regexodus.Pattern;
006import regexodus.REFlags;
007import regexodus.Replacer;
008import squidpony.squidmath.*;
009
010import java.io.Serializable;
011import java.util.ArrayList;
012import java.util.Arrays;
013import java.util.Collection;
014import java.util.Collections;
015import java.util.List;
016import java.util.Map;
017import java.util.Set;
018
019/**
020 * A text generator for producing sentences and/or words in nonsense languages that fit a theme. This does not use an
021 * existing word list as a basis for its output, so it may or may not produce existing words occasionally, but you can
022 * safely assume it won't generate a meaningful sentence except in the absolute unlikeliest of cases.
023 * <br>
024 * This supports a lot of language styles in predefined constants. There's a registry of these constants in
025 * {@link #registered} and their names in {@link #registeredNames}, plus the languages that would make sense for
026 * real-world cultures to use (and all use the Latin alphabet, so they can be swapped around) are in
027 * {@link #romanizedHumanLanguages}. You can make a new language with a constructor, but it's pretty time-consuming; the
028 * recommended ways are generating a random language with {@link #randomLanguage(long)} (when you don't care too much
029 * about exactly how it should sound), or blending two or more languages with {@link #mixAll(Object...)} or
030 * {@link #mix(double, FakeLanguageGen, double, Object...)} (when you have a sound in mind that isn't quite met by an
031 * existing language).
032 * <br>
033 * Created by Tommy Ettinger on 11/29/2015.
034 * @see NaturalLanguageCipher NaturalLanguageCipher uses a FakeLanguageGen to reversibly translate English text to nonsense.
035 * @see Thesaurus Thesaurus uses this class a lot to generate things like plant names and the titles of nations.
036 */
037public class FakeLanguageGen implements Serializable {
038    private static final long serialVersionUID = -2396642435461186352L;
039    public final String[] openingVowels, midVowels, openingConsonants, midConsonants, closingConsonants,
040            vowelSplitters, closingSyllables;
041    public final boolean clean;
042    public final double[] syllableFrequencies;
043    protected double totalSyllableFrequency;
044    public final double vowelStartFrequency, vowelEndFrequency, vowelSplitFrequency, syllableEndFrequency;
045    public final Pattern[] sanityChecks;
046    public ArrayList<Modifier> modifiers;
047    public static final GWTRNG srng = new GWTRNG();
048    private static final OrderedMap<String, FakeLanguageGen> registry = new OrderedMap<>(64, Hashers.caseInsensitiveStringHasher);
049    protected String summary;
050    protected String name = "Nameless Language";
051    private static final transient StringBuilder sb = new StringBuilder(20);
052    private static final transient StringBuilder ender = new StringBuilder(12);
053    private static final transient StringBuilder ssb = new StringBuilder(80);
054    /**
055     * A pattern String that will match any vowel FakeLanguageGen can produce out-of-the-box, including Latin, Greek,
056     * and Cyrillic; for use when a String will be interpreted as a regex (as in {@link FakeLanguageGen.Alteration}).
057     */
058    public static final String anyVowel = "[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]",
059    /**
060     * A pattern String that will match one or more of any vowels FakeLanguageGen can produce out-of-the-box, including
061     * Latin, Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in 
062     * {@link FakeLanguageGen.Alteration}).
063     */
064    anyVowelCluster = anyVowel + '+',
065    /**
066     * A pattern String that will match any consonant FakeLanguageGen can produce out-of-the-box, including Latin,
067     * Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in
068     * {@link FakeLanguageGen.Alteration}).
069     */
070    anyConsonant = "[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]",
071    /**
072     * A pattern String that will match one or more of any consonants FakeLanguageGen can produce out-of-the-box,
073     * including Latin, Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in
074     * {@link FakeLanguageGen.Alteration}).
075     */
076    anyConsonantCluster = anyConsonant + '+';
077    protected static final Pattern repeats = Pattern.compile("(.)\\1+"),
078            vowelClusters = Pattern.compile(anyVowelCluster, REFlags.IGNORE_CASE | REFlags.UNICODE),
079            consonantClusters = Pattern.compile(anyConsonantCluster, REFlags.IGNORE_CASE | REFlags.UNICODE);
080    //latin
081    //àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳybcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżž
082    //ÀÁÂÃÄÅÆĀĂĄǺǼAÈÉÊËĒĔĖĘĚEÌÍÎÏĨĪĬĮIIÒÓÔÕÖØŌŎŐŒǾOÙÚÛÜŨŪŬŮŰŲUÝŸŶỲYBCÇĆĈĊČDÞÐĎĐFGĜĞĠĢHĤĦJĴȷKĶLĹĻĽĿŁMNÑŃŅŇŊPQRŔŖŘSŚŜŞŠȘTŢŤȚVWŴẀẂẄXYÝŸŶỲZŹŻŽṚṜḶḸḌṬṄṆṢṂḤ
083    //greek
084    //αοειυρσζτκχνθμπψβλγφξς
085    //ΑΟΕΙΥΡΣΖΤΚΧΝΘΜΠΨΒΛΓΦΞ
086    //cyrillic
087    //аеёийоуъыэюябвгдклпрстфхцжмнзчшщ
088    //АЕЁИЙОУЪЫЭЮЯБВГДКЛПРСТФХЦЖМНЗЧШЩ
089
090    private static final Pattern[]
091            vulgarChecks = new Pattern[]
092            {
093                    //17 is REFlags.UNICODE | REFlags.IGNORE_CASE
094                    Pattern.compile("[sξζzkкκcсς][hнlι].{1,3}[dtтτΓг]", 17),
095                    Pattern.compile("(?:(?:[pрρ][hн])|[fd]).{1,3}[kкκcсςxхжχq]", 17), // lots of these end in a 'k' sound, huh
096                    Pattern.compile("[kкκcсςСQq][uμυνvhн]{1,3}[kкκcсςxхжχqmм]", 17),
097                    Pattern.compile("[bъыбвβЪЫБ].?[iτιyуλγУ].?[cсς]", 17),
098                    Pattern.compile("[hн][^aаαΛeезξεЗΣiτιyуλγУ][^aаαΛeезξεЗΣiτιyуλγУ]?[rяΓ]", 17),
099                    Pattern.compile("[tтτΓгcсς][iτιyуλγУ][tтτΓг]+$", 17),
100                    Pattern.compile("(?:(?:[pрρ][hн])|f)[aаαΛhн]{1,}[rяΓ][tтτΓг]", 17),
101                    Pattern.compile("[Ssξζzcсς][hн][iτιyуλγУ].?[sξζzcсς]", 17),
102                    Pattern.compile("[aаαΛ][nи][aаαΛeезξεЗΣiτιyуλγУoоюσοuμυνv]{1,2}[Ssξlιζz]", 17),
103                    Pattern.compile("[aаαΛ]([sξζz]{2})", 17),
104                    Pattern.compile("[kкκcсςСQq][hн]?[uμυνv]([hн]?)[nи]+[tтτΓг]", 17),
105                    Pattern.compile("[nиfvν]..?[jg]", 17), // might as well remove two possible slurs and a body part with one check
106                    Pattern.compile("[pрρ](?:(?:([eезξεЗΣoоюσοuμυνv])\\1)|(?:[eезξεЗΣiτιyуλγУuμυνv]+[sξζz]))", 17), // the grab bag of juvenile words
107                    Pattern.compile("[mм][hнwψшщ]?..?[rяΓ].?d", 17), // should pick up the #1 obscenity from Spanish and French
108                    Pattern.compile("[g][hн]?[aаαАΑΛeеёзξεЕЁЗΕΣ][yуλγУeеёзξεЕЁЗΕΣ]", 17), // could be inappropriate for random text
109                    Pattern.compile("[wψшщuμυνv](?:[hн]?)[aаαΛeеёзξεЗΕΣoоюσοuμυνv](?:[nи]+)[gkкκcсςxхжχq]", 17)
110            },
111            genericSanityChecks = new Pattern[]
112                    {
113                            Pattern.compile("[AEIOUaeiou]{3}"),
114                            Pattern.compile("(\\p{L})\\1\\1"),
115                            Pattern.compile("[Ii][iyq]"),
116                            Pattern.compile("[Yy]([aiu])\\1"),
117                            Pattern.compile("[Rr][uy]+[rh]"),
118                            Pattern.compile("[Qq]u[yu]"),
119                            Pattern.compile("[^oaei]uch"),
120                            Pattern.compile("[Hh][tcszi]?h"),
121                            Pattern.compile("[Tt]t[^aeiouy]{2}"),
122                            Pattern.compile("[Yy]h([^aeiouy]|$)"),
123                            Pattern.compile("([xqy])\\1$"),
124                            Pattern.compile("[qi]y$"),
125                            Pattern.compile("[szSZrlRL]+?[^aeiouytdfgkcpbmnslrv][rlsz]"),
126                            Pattern.compile("[UIuiYy][wy]"),
127                            Pattern.compile("^[UIui]e"),
128                            Pattern.compile("^([^aeioyl])\\1", 17)
129                    },
130            englishSanityChecks = new Pattern[]
131                    {
132                            Pattern.compile("[AEIOUaeiou]{3}"),
133                            Pattern.compile("(\\w)\\1\\1"),
134                            Pattern.compile("(.)\\1(.)\\2"),
135                            Pattern.compile("[Aa][ae]"),
136                            Pattern.compile("[Uu][umlkj]"),
137                            Pattern.compile("[Ii][iyqkhrl]"),
138                            Pattern.compile("[Oo][c]"),
139                            Pattern.compile("[Yy]([aiu])\\1"),
140                            Pattern.compile("[Rr][aeiouy]+[rh]"),
141                            Pattern.compile("[Qq]u[yu]"),
142                            Pattern.compile("[^oaei]uch"),
143                            Pattern.compile("[Hh][tcszi]?h"),
144                            Pattern.compile("[Tt]t[^aeiouy]{2}"),
145                            Pattern.compile("[Yy]h([^aeiouy]|$)"),
146                            Pattern.compile("[szSZrlRL]+?[^aeiouytdfgkcpbmnslr][rlsz]"),
147                            Pattern.compile("[UIuiYy][wy]"),
148                            Pattern.compile("^[UIui][ae]"),
149                            Pattern.compile("q(?:u?)$")
150                    },
151            japaneseSanityChecks = new Pattern[]
152                    {
153                            Pattern.compile("[AEIOUaeiou]{3}"),
154                            Pattern.compile("(\\w)\\1\\1"),
155                            Pattern.compile("[Tt]s[^u]"),
156                            Pattern.compile("[Ff][^u]"),
157                            Pattern.compile("[Yy][^auo]"),
158                            Pattern.compile("[Tt][ui]"),
159                            Pattern.compile("[SsZzDd]i"),
160                            Pattern.compile("[Hh]u"),
161                    },
162            arabicSanityChecks = new Pattern[]
163                    {
164                            Pattern.compile("(.)\\1\\1"),
165                            Pattern.compile("-[^aeiou](?:[^aeiou]|$)"),
166                    };
167    private static final Replacer[]
168            accentFinders = new Replacer[]
169            {
170                    Pattern.compile("[àáâäăāãåąǻ]").replacer("a"),
171                    Pattern.compile("[èéêëĕēėęě]").replacer("e"),
172                    Pattern.compile("[ìíîïĭīĩįı]").replacer("i"),
173                    Pattern.compile("[òóôöŏōõøőǿ]").replacer("o"),
174                    Pattern.compile("[ùúûüŭūũůűų]").replacer("u"),
175                    Pattern.compile("[æǽ]").replacer("ae"),
176                    Pattern.compile("œ").replacer("oe"),
177                    Pattern.compile("[ÀÁÂÃÄÅĀĂĄǺ]").replacer("A"),
178                    Pattern.compile("[ÈÉÊËĒĔĖĘĚ]").replacer("E"),
179                    Pattern.compile("[ÌÍÎÏĨĪĬĮI]").replacer("I"),
180                    Pattern.compile("[ÒÓÔÕÖØŌŎŐǾ]").replacer("O"),
181                    Pattern.compile("[ÙÚÛÜŨŪŬŮŰŲ]").replacer("U"),
182                    Pattern.compile("[ÆǼ]").replacer("Ae"),
183                    Pattern.compile("Œ").replacer("Oe"),
184                    Pattern.compile("Ё").replacer("Е"),
185                    Pattern.compile("Й").replacer("И"),
186                    Pattern.compile("[çćĉċč]").replacer("c"),
187                    Pattern.compile("[þðďđ]").replacer("d"),
188                    Pattern.compile("[ĝğġģ]").replacer("g"),
189                    Pattern.compile("[ĥħ]").replacer("h"),
190                    Pattern.compile("[ĵȷ]").replacer("j"),
191                    Pattern.compile("ķ").replacer("k"),
192                    Pattern.compile("[ĺļľŀłļ]").replacer("l"),
193                    Pattern.compile("[ñńņňŋ]").replacer("n"),
194                    Pattern.compile("[ŕŗřŗŕ]").replacer("r"),
195                    Pattern.compile("[śŝşšș]").replacer("s"),
196                    Pattern.compile("[ţťŧț]").replacer("t"),
197                    Pattern.compile("[ŵẁẃẅ]").replacer("w"),
198                    Pattern.compile("[ýÿŷỳ]").replacer("y"),
199                    Pattern.compile("[źżž]").replacer("z"),
200                    Pattern.compile("[ÇĆĈĊČ]").replacer("C"),
201                    Pattern.compile("[ÞÐĎĐḌ]").replacer("D"),
202                    Pattern.compile("[ĜĞĠĢ]").replacer("G"),
203                    Pattern.compile("[ĤĦḤ]").replacer("H"),
204                    Pattern.compile("Ĵ").replacer("J"),
205                    Pattern.compile("Ķ").replacer("K"),
206                    Pattern.compile("[ĹĻĽĿŁḶḸĻ]").replacer("L"),
207                    Pattern.compile("Ṃ").replacer("M"),
208                    Pattern.compile("[ÑŃŅŇŊṄṆ]").replacer("N"),
209                    Pattern.compile("[ŔŖŘṚṜŖŔ]").replacer("R"),
210                    Pattern.compile("[ŚŜŞŠȘṢ]").replacer("S"),
211                    Pattern.compile("[ŢŤŦȚṬ]").replacer("T"),
212                    Pattern.compile("[ŴẀẂẄ]").replacer("W"),
213                    Pattern.compile("[ÝŸŶỲ]").replacer("Y"),
214                    Pattern.compile("[ŹŻŽ]").replacer("Z"),
215                    Pattern.compile("ё").replacer("е"),
216                    Pattern.compile("й").replacer("и"),
217            };
218
219    static final char[][] accentedVowels = new char[][]{
220            new char[]{'a', 'à', 'á', 'â', 'ä', 'ā', 'ă', 'ã', 'å', 'ą', 'ǻ'},
221            new char[]{'e', 'è', 'é', 'ê', 'ë', 'ē', 'ĕ', 'ė', 'ę', 'ě'},
222            new char[]{'i', 'ì', 'í', 'î', 'ï', 'ī', 'ĭ', 'ĩ', 'į', 'ı',},
223            new char[]{'o', 'ò', 'ó', 'ô', 'ö', 'ō', 'ŏ', 'õ', 'ø', 'ő', 'ǿ'},
224            new char[]{'u', 'ù', 'ú', 'û', 'ü', 'ū', 'ŭ', 'ũ', 'ů', 'ű', 'ų'}
225    },
226            accentedConsonants = new char[][]
227                    {
228                            new char[]{
229                                    'b'
230                            },
231                            new char[]{
232                                    'c', 'ç', 'ć', 'ĉ', 'ċ', 'č',
233                            },
234                            new char[]{
235                                    'd', 'þ', 'ð', 'ď', 'đ',
236                            },
237                            new char[]{
238                                    'f'
239                            },
240                            new char[]{
241                                    'g', 'ĝ', 'ğ', 'ġ', 'ģ',
242                            },
243                            new char[]{
244                                    'h', 'ĥ', 'ħ',
245                            },
246                            new char[]{
247                                    'j', 'ĵ', 'ȷ',
248                            },
249                            new char[]{
250                                    'k', 'ķ',
251                            },
252                            new char[]{
253                                    'l', 'ĺ', 'ļ', 'ľ', 'ŀ', 'ł',
254                            },
255                            new char[]{
256                                    'm',
257                            },
258                            new char[]{
259                                    'n', 'ñ', 'ń', 'ņ', 'ň', 'ŋ',
260                            },
261                            new char[]{
262                                    'p',
263                            },
264                            new char[]{
265                                    'q',
266                            },
267                            new char[]{
268                                    'r', 'ŕ', 'ŗ', 'ř',
269                            },
270                            new char[]{
271                                    's', 'ś', 'ŝ', 'ş', 'š', 'ș',
272                            },
273                            new char[]{
274                                    't', 'ţ', 'ť', 'ț',
275                            },
276                            new char[]{
277                                    'v',
278                            },
279                            new char[]{
280                                    'w', 'ŵ', 'ẁ', 'ẃ', 'ẅ',
281                            },
282                            new char[]{
283                                    'x',
284                            },
285                            new char[]{
286                                    'y', 'ý', 'ÿ', 'ŷ', 'ỳ',
287                            },
288                            new char[]{
289                                    'z', 'ź', 'ż', 'ž',
290                            },
291                    };
292    private static final OrderedMap<String, String> openVowels,
293            openCons, midCons, closeCons;
294
295    static {
296
297        registry.put("", null);
298        
299        openVowels = Maker.makeOM(
300        "a", "a aa ae ai au ea ia oa ua",
301        "e", "e ae ea ee ei eo eu ie ue",
302        "i", "i ai ei ia ie io iu oi ui",
303        "o", "o eo io oa oi oo ou",
304        "u", "u au eu iu ou ua ue ui");
305
306        openCons = Maker.makeOM(
307                "b", "b bl br by bw bh",
308                "bh", "bh",
309        "c", "c cl cr cz cth sc scl",
310        "ch", "ch ch chw",
311        "d", "d dr dz dy dw dh",
312        "dh", "dh",
313        "f", "f fl fr fy fw sf",
314        "g", "g gl gr gw gy gn",
315        "h", "bh cth ch ch chw dh h hm hy hw kh khl khw ph phl phr sh shl shqu shk shp shm shn shr shw shpl th th thr thl thw",
316        "j", "j j",
317        "k", "k kr kl ky kn sk skl shk",
318        "kh", "kh khl khw",
319        "l", "bl cl fl gl kl khl l pl phl scl skl spl sl shl shpl tl thl vl zl",
320        "m", "hm m mr mw my sm smr shm",
321        "n", "gn kn n nw ny pn sn shn",
322        "p", "p pl pr py pw pn sp spr spl shp shpl ph phl phr",
323        "ph", "ph phl phr",
324        "q", "q",
325        "qu", "qu squ shqu",
326        "r", "br cr dr fr gr kr mr pr phr r str spr smr shr tr thr vr wr zvr",
327        "s", "s sc scl sf sk skl st str sp spr spl sl sm smr sn sw sy squ ts sh shl shqu shk shp shm shn shr shw shpl",
328        "sh", "sh shl shqu shk shp shm shn shr shw shpl",
329        "t", "st str t ts tr tl ty tw tl",
330        "th", "cth th thr thl thw",
331        "tl", "tl",
332        "v", "v vr vy zv zvr vl",
333        "w", "bw chw dw fw gw hw khw mw nw pw sw shw tw thw w wr zw",
334        "x", "x",
335        "y", "by dy fy gy hy ky my ny py sy ty vy y zy",
336        "z", "cz dz z zv zvr zl zy zw");
337
338        midCons = Maker.makeOM(
339                "b", "lb rb bj bl br lbr rbl skbr scbr zb bq bdh dbh bbh lbh rbh bb",
340        "bh", "bbh dbh lbh rbh",
341        "c", "lc lsc rc rsc cl cqu cr ct lcr rcl sctr scdr scbr scpr msc mscr nsc nscr ngscr ndscr cc",
342        "ch", "lch rch rch",
343        "d", "ld ld rd rd skdr scdr dr dr dr rdr ldr zd zdr ndr ndscr ndskr ndst dq ldh rdh dbh bdh ddh dd",
344        "dh", "bdh ddh ldh rdh",
345        "f", "lf rf fl fr fl fr fl fr lfr rfl ft ff",
346        "g", "lg lg rg rg gl gr gl gr gl gr lgr rgl zg zgr ngr ngl ngscr ngskr gq gg",
347        "h", "lch lph lth lsh rch rph rsh rth phl phr lphr rphl shl shr lshr rshl msh mshr zth bbh dbh lbh rbh bdh ddh ldh rdh",
348        "j", "bj lj rj",
349        "k", "lk lsk rk rsk kl kr lkr rkl sktr skdr skbr skpr tk zk zkr msk mskr nsk nskr ngskr ndskr kq kk",
350        "kh", "lkh rkh",
351        "l", "lb lc lch ld lf lg lj lk lm ln lp lph ls lst lt lth lsc lsk lsp lv lz lsh bl lbr rbl cl lcr rcl fl lfr rfl gl lgr rgl kl lkr rkl pl lpr rpl phl lphr rphl shl lshr rshl sl rsl lsl ldr ltr lx ngl nsl msl nsl ll lth tl ltl rtl vl",
352        "m", "lm rm zm msl msc mscr msh mshr mst msp msk mskr mm",
353        "n", "ln rn nx zn zn ndr nj ntr ntr ngr ngl nsl nsl nsc nscr ngscr ndscr nsk nskr ngskr ndskr nst ndst nsp nn",
354        "p", "lp lsp rp rsp pl pr lpr rpl skpr scpr zp msp nsp lph rph phl phr lphr rphl pq pp",
355        "ph", "lph lph rph rph phl phr lphr rphl",
356        "q", "bq dq gq kq pq tq",
357        "qu", "cqu lqu rqu",
358        "r", "rb rc rch rd rf rg rj rk rm rn rp rph rs rsh rst rt rth rsc rsk rsp rv rz br br br lbr rbl cr cr cr lcr rcl fr fr fr lfr rfl gr gr gr lgr rgl kr kr kr lkr rkl pr pr pr lpr rpl phr phr phr lphr rphl shr shr shr lshr rshl rsl sktr sctr skdr scdr skbr scbr skpr scpr dr dr dr rdr ldr tr tr tr rtr ltr vr rx zr zdr ztr zgr zkr ntr ntr ndr ngr mscr mshr mskr nscr ngscr ndscr nskr ngskr ndskr rr",
359        "s", "ls lst lsc lsk lsp rs rst rsc rsk rsp sl rsl lsl sktr sctr skdr scdr skbr scbr skpr scpr nsl msl msc mscr mst msp msk mskr nsl nsc nscr ngscr ndscr nsk nskr ngskr ndskr nst ndst nsp lsh rsh sh shl shqu shk shp shm shn shr shw shpl lshr rshl msh mshr ss",
360        "sh", "lsh rsh sh shl shqu shk shp shm shn shr shw shpl lshr rshl msh mshr",
361        "t", "ct ft lst lt rst rt sktr sctr tk tr rtr ltr zt ztr ntr ntr mst nst ndst tq ltl rtl tt",
362        "th", "lth rth zth cth",
363        "tl", "ltl rtl",
364        "v", "lv rv vv vl vr",
365        "w", "bw chw dw fw gw hw khw mw nw pw sw shw tw thw w wr wy zw",
366        "x", "nx rx lx",
367        "y", "by dy fy gy hy ky my ny py sy ty vy wy zy",
368        "z", "lz rz zn zd zt zg zk zm zn zp zb zr zdr ztr zgr zkr zth zz");
369
370        closeCons = Maker.makeOM("b", "b lb rb bs bz mb mbs bh bh lbh rbh mbh bb",
371        "bh", "bh lbh rbh mbh",
372        "c", "c ck cks lc rc cs cz ct cz cth sc",
373        "ch", "ch lch rch tch pch kch mch nch",
374        "d", "d ld rd ds dz dt dsh dth gd nd nds dh dh ldh rdh ndh dd",
375        "dh", "dh ldh rdh ndh",
376        "f", "f lf rf fs fz ft fsh ft fth ff",
377        "g", "g lg rg gs gz gd gsh gth ng ngs gg",
378        "h", "cth ch lch rch tch pch kch mch nch dsh dth fsh fth gsh gth h hs ksh kth psh pth ph ph ph ph ph ph lph rph phs pht phth",
379        "j", "j",
380        "k", "ck cks kch k lk rk ks kz kt ksh kth nk nks sk",
381        "kh", "kh",
382        "l", "lb lc lch ld lf lg lk l ls lz lp lph ll",
383        "m", "mch m ms mb mt mp mbs mps mz sm mm",
384        "n", "nch n ns nd nt nk nds nks nz ng ngs nn",
385        "p", "pch mp mps p lp rp ps pz pt psh pth sp sp ph lph rph phs pht phth",
386        "ph", "ph lph rph phs pht phth",
387        "q", "q",
388        "qu", "",
389        "r", "rb rc rch rd rf rg rk rp rph r rs rz",
390        "s", "bs cks cs ds fs gs hs ks ls ms mbs mps ns nds nks ngs ps phs rs s st sp st sp sc sk sm ts lsh rsh sh shk shp msh ss",
391        "sh", "lsh rsh sh shk shp msh",
392        "t", "ct ft tch dt ft kt mt nt pt pht st st t ts tz tt",
393        "th", "cth dth fth gth kth pth phth th ths",
394        "tl", "tl",
395        "v", "v",
396        "w", "",
397        "x", "x",
398        "y", "",
399        "z", "bz cz dz fz gz kz lz mz nz pz rz tz z zz");
400    }
401
402    /*
403     * Removes accented characters from a string; if the "base" characters are non-English anyway then the result won't
404     * be an ASCII string, but otherwise it probably will be.
405     * <br>
406     * Credit to user hashable from http://stackoverflow.com/a/1215117
407     *
408     * @param str a string that may contain accented characters
409     * @return a string with all accented characters replaced with their (possibly ASCII) counterparts
410     *
411    public String removeAccents(String str) {
412        String alteredString = Normalizer.normalize(str, Normalizer.Form.NFD);
413        alteredString = diacritics.matcher(alteredString).replaceAll("");
414        alteredString = alteredString.replace('æ', 'a');
415        alteredString = alteredString.replace('œ', 'o');
416        alteredString = alteredString.replace('Æ', 'A');
417        alteredString = alteredString.replace('Œ', 'O');
418        return alteredString;
419    }*/
420
421    /**
422     * Removes accented Latin-script characters from a string; if the "base" characters are non-English anyway then the
423     * result won't be an ASCII string, but otherwise it probably will be.
424     *
425     * @param str a string that may contain accented Latin-script characters
426     * @return a string with all accented characters replaced with their (possibly ASCII) counterparts
427     */
428    public static CharSequence removeAccents(CharSequence str) {
429        CharSequence alteredString = str;
430        for (int i = 0; i < accentFinders.length; i++) {
431            alteredString = accentFinders[i].replace(alteredString);
432        }
433        return alteredString;
434    }
435
436    private FakeLanguageGen register(String languageName) {
437        summary = registry.size() + "@1";
438        registry.put(languageName,this);
439        name = languageName;
440        return copy();
441    }
442
443    private FakeLanguageGen summarize(String brief) {
444        summary = brief;
445        return this;
446    }
447
448    private static FakeLanguageGen lovecraft() {
449        return new FakeLanguageGen(
450                new String[]{"a", "i", "o", "e", "u", "a", "i", "o", "e", "u", "ia", "ai", "aa", "ei"},
451                new String[]{},
452                new String[]{"s", "t", "k", "n", "y", "p", "k", "l", "g", "gl", "th", "sh", "ny", "ft", "hm", "zvr", "cth"},
453                new String[]{"h", "gl", "gr", "nd", "mr", "vr", "kr"},
454                new String[]{"l", "p", "s", "t", "n", "k", "g", "x", "rl", "th", "gg", "gh", "ts", "lt", "rk", "kh", "sh", "ng", "shk"},
455                new String[]{"aghn", "ulhu", "urath", "oigor", "alos", "'yeh", "achtal", "elt", "ikhet", "adzek", "agd"},
456                new String[]{"'", "-"}, new int[]{1, 2, 3}, new double[]{6, 7, 2},
457                0.4, 0.31, 0.07, 0.04, null, true);
458    }
459    /**
460     * Ia! Ia! Cthulhu Rl'yeh ftaghn! Useful for generating cultist ramblings or unreadable occult texts. You may want
461     * to consider mixing this with multiple other languages using {@link #mixAll(Object...)}; using some very different
462     * languages in low amounts relative to the amount used for this, like {@link #NAHUATL}, {@link #INUKTITUT},
463     * {@link #SOMALI}, {@link #DEEP_SPEECH}, and {@link #INSECT} can alter the aesthetic of the generated text in ways
464     * that may help distinguish magic styles.
465     * <br>
466     * Zvrugg pialuk, ya'as irlemrugle'eith iposh hmo-es nyeighi, glikreirk shaivro'ei!
467     */
468    public static final FakeLanguageGen LOVECRAFT = lovecraft().register("Lovecraft");
469    private static FakeLanguageGen english() {
470        return new FakeLanguageGen(
471                new String[]{
472                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
473                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
474                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
475                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
476                        "au", "ai", "ai", "ou", "ea", "ie", "io", "ei",
477                },
478                new String[]{"u", "u", "oa", "oo", "oo", "oo", "ee", "ee", "ee", "ee",},
479                new String[]{
480                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n",
481                        "p", "pl", "pr", "qu", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "thr", "v", "w", "y", "z",
482                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n",
483                        "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y",
484                        "b", "br", "c", "ch", "d", "dr", "f", "g", "h", "j", "l", "m", "n",
485                        "p", "r", "s", "sh", "st", "sl", "t", "tr", "th",
486                        "b", "d", "f", "g", "h", "l", "m", "n",
487                        "p", "r", "s", "sh", "t", "th",
488                        "b", "d", "f", "g", "h", "l", "m", "n",
489                        "p", "r", "s", "sh", "t", "th",
490                        "r", "s", "t", "l", "n",
491                        "str", "spr", "spl", "wr", "kn", "kn", "gn",
492                },
493                new String[]{"x", "cst", "bs", "ff", "lg", "g", "gs",
494                        "ll", "ltr", "mb", "mn", "mm", "ng", "ng", "ngl", "nt", "ns", "nn", "ps", "mbl", "mpr",
495                        "pp", "ppl", "ppr", "rr", "rr", "rr", "rl", "rtn", "ngr", "ss", "sc", "rst", "tt", "tt", "ts", "ltr", "zz"
496                },
497                new String[]{"b", "rb", "bb", "c", "rc", "ld", "d", "ds", "dd", "f", "ff", "lf", "rf", "rg", "gs", "ch", "lch", "rch", "tch",
498                        "ck", "ck", "lk", "rk", "l", "ll", "lm", "m", "rm", "mp", "n", "nk", "nch", "nd", "ng", "ng", "nt", "ns", "lp", "rp",
499                        "p", "r", "rn", "rts", "s", "s", "s", "s", "ss", "ss", "st", "ls", "t", "t", "ts", "w", "wn", "x", "ly", "lly", "z",
500                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "t", "w",
501                },
502                new String[]{"ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
503                        "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
504                        "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
505                        "ay", "ay", "ey", "oy", "ay", "ay", "ey", "oy",
506                        "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition",
507                        "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition",
508                        "ily", "ily", "ily", "adly", "owly", "oorly", "ardly", "iedly",
509                },
510                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{10, 11, 4, 1},
511                0.22, 0.1, 0.0, 0.22, englishSanityChecks, true);
512    }
513    /**
514     * Imitation English; may seem closer to Dutch in some generated text, and is not exactly the best imitation.
515     * Should seem pretty fake to many readers; does not filter out dictionary words but does perform basic vulgarity
516     * filtering. If you want to avoid generating other words, you can subclass FakeLanguageGen and modify word() .
517     * <br>
518     * Mont tiste frot; mousation hauddes?
519     * Lily wrely stiebes; flarrousseal gapestist.
520     */
521    public static final FakeLanguageGen ENGLISH = english().register("English");
522
523    private static FakeLanguageGen greekRomanized(){
524        return new FakeLanguageGen(
525                new String[]{"a", "a", "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "i", "i", "au", "ai", "ai", "oi", "oi",
526                        "ia", "io", "u", "u", "eo", "ei", "o", "o", "ou", "oi", "y", "y", "y", "y"},
527                new String[]{"ui", "ui", "ei", "ei"},
528                new String[]{"rh", "s", "z", "t", "t", "k", "ch", "n", "th", "kth", "m", "p", "ps", "b", "l", "kr",
529                        "g", "phth", "d", "t", "k", "ch", "n", "ph", "ph", "k",},
530                new String[]{"lph", "pl", "l", "l", "kr", "nch", "nx", "ps"},
531                new String[]{"s", "p", "t", "ch", "n", "m", "s", "p", "t", "ch", "n", "m", "b", "g", "st", "rst",
532                        "rt", "sp", "rk", "ph", "x", "z", "nk", "ng", "th", "d", "k", "n", "n",},
533                new String[]{"os", "os", "os", "is", "is", "us", "um", "eum", "ium", "iam", "us", "um", "es",
534                        "anes", "eros", "or", "or", "ophon", "on", "on", "ikon", "otron", "ik",},
535                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 7, 4, 1}, 0.45, 0.45, 0.0, 0.2, null, true);
536    }
537    
538    /**
539     * Imitation ancient Greek, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers.
540     * <br>
541     * Psuilas alor; aipeomarta le liaspa...
542     */
543    public static final FakeLanguageGen GREEK_ROMANIZED = greekRomanized().register("Greek Romanized");
544    private static FakeLanguageGen greekAuthentic(){
545        return new FakeLanguageGen(
546            new String[]{"α", "α", "α", "α", "α", "ο", "ο", "ε", "ε", "ε", "ι", "ι", "ι", "ι", "ι", "αυ", "αι", "αι", "οι", "οι",
547                    "ια", "ιο", "ου", "ου", "εο", "ει", "ω", "ω", "ωυ", "ωι", "υ", "υ", "υ", "υ"},
548            new String[]{"υι", "υι", "ει", "ει"},
549            new String[]{"ρ", "σ", "ζ", "τ", "τ", "κ", "χ", "ν", "θ", "κθ", "μ", "π", "ψ", "β", "λ", "κρ",
550                    "γ", "φθ", "δ", "τ", "κ", "χ", "ν", "φ", "φ", "κ",},
551            new String[]{"λφ", "πλ", "λ", "λ", "κρ", "γχ", "γξ", "ψ"},
552            new String[]{"σ", "π", "τ", "χ", "ν", "μ", "σ", "π", "τ", "χ", "ν", "μ", "β", "γ", "στ", "ρστ",
553                    "ρτ", "σπ", "ρκ", "φ", "ξ", "ζ", "γκ", "γγ", "θ", "δ", "κ", "ν", "ν",},
554            new String[]{"ος", "ος", "ος", "ις", "ις", "υς", "υμ", "ευμ", "ιυμ", "ιαμ", "υς", "υμ", "ες",
555                    "ανες", "ερος", "ορ", "ορ", "οφον", "ον", "ον", "ικον", "οτρον", "ικ",},
556            new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 7, 4, 1}, 0.45, 0.45, 0.0, 0.2, null, true);
557    }
558    /**
559     * Imitation ancient Greek, using the original Greek alphabet. People may try to translate it and get gibberish.
560     * Make sure the font you use to render this supports the Greek alphabet! In the GDX display module, most
561     * fonts support all the Greek you need for this.
562     * <br>
563     * Ψυιλασ αλορ; αιπεομαρτα λε λιασπα...
564     */
565    public static final FakeLanguageGen GREEK_AUTHENTIC = greekAuthentic().register("Greek Authentic");
566
567    private static FakeLanguageGen french(){
568        return new FakeLanguageGen(
569                new String[]{"a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o",
570                        "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o",
571                        "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
572                        "ai", "oi", "oui", "au", "œu", "ou"
573                },
574                new String[]{
575                        "ai", "aie", "aou", "eau", "oi", "oui", "oie", "eu", "eu",
576                        "à", "â", "ai", "aî", "aï", "aie", "aou", "aoû", "au", "ay", "e", "é", "ée", "è",
577                        "ê", "eau", "ei", "eî", "eu", "eû", "i", "î", "ï", "o", "ô", "oe", "oê", "oë", "œu",
578                        "oi", "oie", "oï", "ou", "oû", "oy", "u", "û", "ue",
579                        "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o",
580                        "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
581                        "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o",
582                        "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
583                        "ai", "ai", "eau", "oi", "oi", "oui", "eu", "au", "au", "ei", "ei", "oe", "oe", "ou", "ou", "ue"
584                },
585                new String[]{"tr", "ch", "m", "b", "b", "br", "j", "j", "j", "j", "g", "t", "t", "t", "c", "d", "f", "f", "h", "n", "l", "l",
586                        "s", "s", "s", "r", "r", "r", "v", "v", "p", "pl", "pr", "bl", "br", "dr", "gl", "gr"},
587                new String[]{"cqu", "gu", "qu", "rqu", "nt", "ng", "ngu", "mb", "ll", "nd", "ndr", "nct", "st",
588                        "xt", "mbr", "pl", "g", "gg", "ggr", "gl", "bl", "j", "gn",
589                        "m", "m", "mm", "v", "v", "f", "f", "f", "ff", "b", "b", "bb", "d", "d", "dd", "s", "s", "s", "ss", "ss", "ss",
590                        "cl", "cr", "ng", "ç", "ç", "rç", "rd", "lg", "rg"},
591                new String[]{"rt", "ch", "m", "b", "b", "lb", "t", "t", "t", "t", "c", "d", "f", "f", "n", "n", "l", "l",
592                        "s", "s", "s", "r", "r", "p", "rd", "ff", "ss", "ll"
593                },
594                new String[]{"e", "e", "e", "e", "e", "é", "é", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es", "es",
595                        "e", "e", "e", "e", "e", "é", "é", "er", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es",
596                        "e", "e", "e", "e", "e", "é", "é", "é", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es",
597                        "ent", "em", "en", "en", "aim", "ain", "an", "oin", "ien", "iere", "ors", "anse",
598                        "ombs", "ommes", "ancs", "ends", "œufs", "erfs", "ongs", "aps", "ats", "ives", "ui", "illes",
599                        "aen", "aon", "am", "an", "eun", "ein", "age", "age", "uile", "uin", "um", "un", "un", "un",
600                        "aille", "ouille", "eille", "ille", "eur", "it", "ot", "oi", "oi", "oi", "aire", "om", "on", "on",
601                        "im", "in", "in", "ien", "ien", "ine", "ion", "il", "eil", "oin", "oint", "iguïté", "ience", "incte",
602                        "ang", "ong", "acré", "eau", "ouche", "oux", "oux", "ect", "ecri", "agne", "uer", "aix", "eth", "ut", "ant",
603                        "anc", "anc", "anche", "ioche", "eaux", "ive", "eur", "ancois", "ecois", "ente", "enri",
604                        "arc", "oc", "ouis", "arche", "ique", "ique", "ique", "oque", "arque", "uis", "este", "oir", "oir"
605                },
606                new String[]{}, new int[]{1, 2, 3}, new double[]{15, 7, 2}, 0.35, 1.0, 0.0, 0.4, null, true);
607    }
608    /**
609     * Imitation modern French, using the (many) accented vowels that are present in the language. Translating it
610     * will produce gibberish if it produces anything at all. In the GDX display module, most
611     * fonts support all the accented characters you need for this.
612     * <br>
613     * Bœurter; ubi plaqua se saigui ef brafeur?
614     */
615    public static final FakeLanguageGen FRENCH = french().register("French");
616
617    private static FakeLanguageGen russianRomanized(){
618        return new FakeLanguageGen(
619                new String[]{"a", "e", "e", "i", "i", "o", "u", "ie", "y", "e", "iu", "ia", "y", "a", "a", "o", "u"},
620                new String[]{},
621                new String[]{"b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f", "kh", "ts",
622                        "b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f", "kh", "ts",
623                        "b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f",
624                        "zh", "m", "n", "z", "ch", "sh", "shch",
625                        "br", "sk", "tr", "bl", "gl", "kr", "gr"},
626                new String[]{"bl", "br", "pl", "dzh", "tr", "gl", "gr", "kr"},
627                new String[]{"b", "v", "g", "d", "zh", "z", "k", "l", "m", "n", "p", "r", "s", "t", "f", "kh", "ts", "ch", "sh",
628                        "v", "f", "sk", "sk", "sk", "s", "b", "d", "d", "n", "r", "r"},
629                new String[]{"odka", "odna", "usk", "ask", "usky", "ad", "ar", "ovich", "ev", "ov", "of", "agda", "etsky", "ich", "on", "akh", "iev", "ian"},
630                new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{4, 5, 6, 5, 3, 1}, 0.1, 0.2, 0.0, 0.12, englishSanityChecks, true);
631    }
632    /**
633     * Imitation modern Russian, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers.
634     * <br>
635     * Zhydotuf ruts pitsas, gogutiar shyskuchebab - gichapofeglor giunuz ieskaziuzhin.
636     */
637    public static final FakeLanguageGen RUSSIAN_ROMANIZED = russianRomanized().register("Russian Romanized");
638
639    private static FakeLanguageGen russianAuthentic(){
640        return new FakeLanguageGen(
641                new String[]{"а", "е", "ё", "и", "й", "о", "у", "ъ", "ы", "э", "ю", "я", "ы", "а", "а", "о", "у"},
642                new String[]{},
643                new String[]{"б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф", "х", "ц",
644                        "б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф", "х", "ц",
645                        "б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф",
646                        "ж", "м", "н", "з", "ч", "ш", "щ",
647                        "бр", "ск", "тр", "бл", "гл", "кр", "гр"},
648                new String[]{"бл", "бр", "пл", "дж", "тр", "гл", "гр", "кр"},
649                new String[]{"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш",
650                        "в", "ф", "ск", "ск", "ск", "с", "б", "д", "д", "н", "р", "р"},
651                new String[]{"одка", "одна", "уск", "аск", "ускы", "ад", "ар", "овйч", "ев", "ов", "оф", "агда", "ёцкы", "йч", "он", "ах", "ъв", "ян"},
652                new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{4, 5, 6, 5, 3, 1}, 0.1, 0.2, 0.0, 0.12, null, true);
653    }
654    /**
655     * Imitation modern Russian, using the authentic Cyrillic alphabet used in Russia and other countries.
656     * Make sure the font you use to render this supports the Cyrillic alphabet!
657     * In the GDX display module, the "smooth" fonts support all the Cyrillic alphabet you need for this.
658     * <br>
659     * Жыдотуф руц пйцас, гогутяр шыскучэбаб - гйчапофёглор гюнуз ъсказюжин.
660     */
661    public static final FakeLanguageGen RUSSIAN_AUTHENTIC = russianAuthentic().register("Russian Authentic");
662
663    private static FakeLanguageGen japaneseRomanized(){
664        return new FakeLanguageGen(
665                new String[]{"a", "a", "a", "a", "e", "e", "i", "i", "i", "i", "o", "o", "o", "u", "ou", "u", "ai", "ai"},
666                new String[]{},
667                new String[]{"k", "ky", "s", "sh", "t", "ts", "ch", "n", "ny", "h", "f", "hy", "m", "my", "y", "r", "ry", "g",
668                        "gy", "z", "j", "d", "b", "by", "p", "py",
669                        "k", "t", "n", "s", "k", "t", "d", "s", "sh", "sh", "g", "r", "b",
670                        "k", "t", "n", "s", "k", "t", "b", "s", "sh", "sh", "g", "r", "b",
671                        "k", "t", "n", "s", "k", "t", "z", "s", "sh", "sh", "ch", "ry", "ts"
672                },
673                new String[]{"k", "ky", "s", "sh", "t", "ts", "ch", "n", "ny", "h", "f", "hy", "m", "my", "y", "r", "ry", "g",
674                        "gy", "z", "j", "d", "b", "by", "p", "py",
675                        "k", "t", "d", "s", "k", "t", "d", "s", "sh", "sh", "y", "j", "p", "r", "d",
676                        "k", "t", "b", "s", "k", "t", "b", "s", "sh", "sh", "y", "j", "p", "r", "d",
677                        "k", "t", "z", "s", "f", "g", "z", "b", "d", "ts", "sh", "m",
678                        "k", "t", "z", "s", "f", "g", "z", "b", "d", "ts", "sh", "m",
679                        "nn", "nn", "nd", "nz", "mm", "kk", "tt", "ss", "ssh", "tch"},
680                new String[]{"n"},
681                new String[]{"ima", "aki", "aka", "ita", "en", "izen", "achi", "uke", "aido", "outsu", "uki", "oku", "aku", "oto", "okyo"},
682                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 4, 5, 4, 3}, 0.3, 0.9, 0.0, 0.07, japaneseSanityChecks, true);
683    }
684    /**
685     * Imitation Japanese, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers.
686     * <br>
687     * Narurehyounan nikase keho...
688     */
689    public static final FakeLanguageGen JAPANESE_ROMANIZED = japaneseRomanized().register("Japanese Romanized");
690
691    private static FakeLanguageGen swahili(){
692        return new FakeLanguageGen(
693                new String[]{"a", "i", "o", "e", "u",
694                        "a", "a", "i", "o", "o", "e", "u",
695                        "a", "a", "i", "o", "o", "u",
696                        "a", "a", "i", "i", "o",
697                        "a", "a", "a", "a", "a",
698                        "a", "i", "o", "e", "u",
699                        "a", "a", "i", "o", "o", "e", "u",
700                        "a", "a", "i", "o", "o", "u",
701                        "a", "a", "i", "i", "o",
702                        "a", "a", "a", "a", "a",
703                        "aa", "aa", "ue", "uo", "ii", "ea"},
704                new String[]{},
705                new String[]{
706                        "b", "h", "j", "l", "s", "y", "m", "n",
707                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
708                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
709                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
710                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
711
712                        "b", "h", "j", "l", "s", "y", "m", "n",
713                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
714                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
715                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
716                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
717
718                        "b", "h", "j", "l", "s", "y", "m", "n",
719                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
720                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
721                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
722                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
723
724                        "b", "h", "j", "l", "s", "y", "m", "n",
725                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
726                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
727                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
728                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
729
730                        "nb", "nj", "ns", "nz",
731                        "nb", "nch", "nj", "ns", "ny", "nz",
732                        "nb", "nch", "nf", "ng", "nj", "nk", "np", "ns", "nz",
733                        "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nz",
734                        "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nv", "nw", "nz",
735
736                        "mb", "ms", "my", "mz",
737                        "mb", "mch", "ms", "my", "mz",
738                        "mb", "mch", "mk", "mp", "ms", "my", "mz",
739                        "mb", "mch", "md", "mk", "mp", "ms", "mt", "my", "mz",
740                        "mb", "mch", "md", "mf", "mg", "mj", "mk", "mp", "ms", "mt", "mv", "mw", "my", "mz",
741                        "sh", "sh", "sh", "ny", "kw",
742                        "dh", "th", "sh", "ny",
743                        "dh", "th", "sh", "gh", "r", "ny",
744                        "dh", "th", "sh", "gh", "r", "ny",
745                },
746                new String[]{
747                        "b", "h", "j", "l", "s", "y", "m", "n",
748                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
749                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
750                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
751                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
752
753                        "b", "h", "j", "l", "s", "y", "m", "n",
754                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
755                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
756                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
757                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
758
759                        "b", "h", "j", "l", "s", "y", "m", "n",
760                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
761                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
762                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
763                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
764
765                        "b", "h", "j", "l", "s", "y", "m", "n",
766                        "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n",
767                        "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n",
768                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw",
769                        "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw",
770
771                        "nb", "nj", "ns", "nz",
772                        "nb", "nch", "nj", "ns", "ny", "nz",
773                        "nb", "nch", "nf", "ng", "nj", "nk", "np", "ns", "nz",
774                        "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nz",
775                        "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nw", "nz",
776
777                        "mb", "ms", "my", "mz",
778                        "mb", "mch", "ms", "my", "mz",
779                        "mb", "mch", "mk", "mp", "ms", "my", "mz",
780                        "mb", "mch", "md", "mk", "mp", "ms", "mt", "my", "mz",
781                        "mb", "mch", "md", "mf", "mg", "mj", "mk", "mp", "ms", "mt", "mw", "my", "mz",
782                        "sh", "sh", "sh", "ny", "kw",
783                        "dh", "th", "sh", "ny",
784                        "dh", "th", "sh", "gh", "r", "ny",
785                        "dh", "th", "sh", "gh", "r", "ny",
786                        "ng", "ng", "ng", "ng", "ng"
787                },
788                new String[]{""},
789                new String[]{"-@"},
790                new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{3, 8, 6, 9, 2, 2}, 0.2, 1.0, 0.0, 0.12, null, true);
791    }
792    /**
793     * Swahili is one of the more commonly-spoken languages in sub-Saharan Africa, and serves mainly as a shared language
794     * that is often learned after becoming fluent in one of many other (vaguely-similar) languages of the area. An
795     * example sentence in Swahili, that this might try to imitate aesthetically, is "Mtoto mdogo amekisoma," meaning
796     * "The small child reads it" (where it is a book). A notable language feature used here is the redoubling of words,
797     * which is used in Swahili to emphasize or alter the meaning of the doubled word; here, it always repeats exactly
798     * and can't make minor changes like a real language might. This generates things like "gata-gata", "hapi-hapi", and
799     * "mimamzu-mimamzu", always separating with a hyphen here.
800     * <br>
801     * As an aside, please try to avoid the ugly stereotypes that fantasy media often assigns to speakers of African-like
802     * languages when using this or any of the generators. Many fantasy tropes come from older literature written with
803     * major cultural biases, and real-world cultural elements can be much more interesting to players than yet another
804     * depiction of a "jungle savage" with stereotypical traits. Consider drawing from existing lists of real-world
805     * technological discoveries, like https://en.wikipedia.org/wiki/History_of_science_and_technology_in_Africa , for
806     * inspiration when world-building; though some groups may not have developed agriculture by early medieval times,
807     * their neighbors may be working iron and studying astronomy just a short distance away.
808     * <br>
809     * Kondueyu; ma mpiyamdabota mise-mise nizakwaja alamsa amja, homa nkajupomba.
810     */
811    public static final FakeLanguageGen SWAHILI = swahili().register("Swahili");
812
813    private static FakeLanguageGen somali(){
814        return new FakeLanguageGen(
815                new String[]{"a", "a", "a", "a", "a", "a", "a", "aa", "aa", "aa",
816                        "e", "e", "ee",
817                        "i", "i", "i", "i", "ii",
818                        "o", "o", "o", "oo",
819                        "u", "u", "u", "uu", "uu",
820                },
821                new String[]{},
822                new String[]{"b", "t", "j", "x", "kh", "d", "r", "s", "sh", "dh", "c", "g", "f", "q", "k", "l", "m",
823                        "n", "w", "h", "y",
824                        "x", "g", "b", "d", "s", "m", "dh", "n", "r",
825                        "g", "b", "s", "dh",
826                },
827                new String[]{
828                        "bb", "gg", "dd", "bb", "dd", "rr", "ddh", "cc", "gg", "ff", "ll", "mm", "nn",
829                        "bb", "gg", "dd", "bb", "dd", "gg",
830                        "bb", "gg", "dd", "bb", "dd", "gg",
831                        "cy", "fk", "ft", "nt", "rt", "lt", "qm", "rdh", "rsh", "lq",
832                        "my", "gy", "by", "lkh", "rx", "md", "bd", "dg", "fd", "mf",
833                        "dh", "dh", "dh", "dh",
834                },
835                new String[]{
836                        "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n", "h",
837                        "x", "g", "b", "d", "s", "m", "q", "n", "r",
838                        "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n", "h",
839                        "x", "g", "b", "d", "s", "m", "q", "n", "r",
840                        "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n",
841                        "g", "b", "d", "s", "q", "n", "r",
842                        "b", "t", "x", "kh", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n",
843                        "g", "b", "d", "s", "r", "n",
844                        "b", "t", "kh", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n",
845                        "g", "b", "d", "s", "r", "n",
846                        "b", "t", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n",
847                        "g", "b", "d", "s", "r", "n",
848                },
849                new String[]{"aw", "ow", "ay", "ey", "oy", "ay", "ay"},
850                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 4, 5, 4, 1}, 0.25, 0.3, 0.0, 0.08, null, true);
851    }
852    /**
853     * Imitation Somali, using the Latin alphabet. Due to uncommon word structure, unusual allowed combinations of
854     * letters, and no common word roots with most familiar languages, this may seem like an unidentifiable or "alien"
855     * language to most readers. However, it's based on the Latin writing system for the Somali language (probably
856     * closest to the northern dialect), which due to the previously mentioned properties, makes it especially good for
857     * mixing with other languages to make letter combinations that seem strange to appear. It is unlikely that this
858     * particular generated language style will be familiar to readers, so it probably won't have existing stereotypes
859     * associated with the text. One early comment this received was, "it looks like a bunch of letters semi-randomly
860     * thrown together", which is probably a typical response (the comment was made by someone fluent in German and
861     * English, and most Western European languages are about as far as you can get from Somali).
862     * <br>
863     * Libor cat naqoxekh dhuugad gisiqir?
864     */
865    public static final FakeLanguageGen SOMALI = somali().register("Somali");
866    private static FakeLanguageGen hindi(){
867        return new FakeLanguageGen(
868                new String[]{
869                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī",
870                        "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
871                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī",
872                        "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
873                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī",
874                        "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
875                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī",
876                        "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
877                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī",
878                        "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
879                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī",
880                        "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
881                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī",
882                        "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
883                        "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī",
884                        "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au",
885                        "aĕ", "aĕ", "aĕ", "aĕ", "aĕ", "āĕ", "āĕ", "iĕ", "iĕ", "iĕ", "īĕ", "īĕ",
886                        "uĕ", "uĕ", "ūĕ", "aiĕ", "aiĕ", "oĕ", "oĕ", "oĕ", "auĕ",
887                        //"aĭ", "aĭ", "aĭ", "aĭ", "aĭ", "āĭ", "āĭ", "iĭ", "iĭ", "iĭ", "īĭ", "īĭ",
888                        //"uĭ", "uĭ", "ūĭ", "aiĭ", "aiĭ", "oĭ", "oĭ", "oĭ", "auĭ",
889                },
890                new String[]{"á", "í", "ú", "ó", "á", "í", "ú", "ó",
891                },
892                new String[]{
893                        "k", "k", "k", "k", "k", "k", "k", "k", "kŗ", "kŕ", "kļ",
894                        "c", "c", "c", "c", "c", "c", "cŗ", "cŕ", "cļ",
895                        "ţ", "t", "t", "t", "t", "t", "t", "t", "t", "t", "tŗ", "tŕ", "tŗ", "tŕ",
896                        "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "pŗ", "pŕ", "pļ", "pĺ", "pŗ", "pŕ", "p", "p",
897                        "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "khŗ", "khŕ", "khļ", "khĺ",
898                        "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "chŗ", "chŕ", "chļ", "chĺ",
899                        "ţh", "th", "th", "th", "th", "th", "th", "th", "th", "th", "thŗ", "thŕ", "thļ", "thĺ",
900                        "ph", "ph", "ph", "ph", "ph", "ph", "ph", "phŗ", "phŕ", "phļ", "phĺ",
901                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
902                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
903                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
904                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
905                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
906                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
907                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
908                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
909                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
910                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
911                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
912                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
913                        "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh",
914                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
915                        "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh",
916                        "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s",
917                        "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh",
918                        "ń", "ņ", "n", "m", "h", "y", "r", "l", "v", "ş", "s",
919                        "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh",
920                        "ń", "ņ", "n", "m", "h", "y", "r", "l", "v", "ş", "s",
921                        "g", "đ", "d", "b", "gh", "đh", "dh", "bh", "n", "m", "v", "s",
922                        "g", "đ", "d", "b", "g", "d", "b", "dh", "bh", "n", "m", "v",
923                        "g", "đ", "d", "b", "g", "d", "b", "dh", "bh", "n", "m", "v",
924                },
925                new String[]{
926                        "k", "k", "k", "k", "k", "nk", "rk",
927                        "k", "k", "k", "k", "k", "nk", "rk",
928                        "k", "k", "k", "k", "k", "nk", "rk",
929                        "k", "k", "k", "k", "k", "nk", "rk",
930                        "k", "k", "k", "k", "k", "nk", "rk",
931                        "k", "k", "k", "k", "k", "nk", "rk",
932                        "k", "k", "k", "k", "k", "nk", "rk",
933                        "k", "k", "k", "k", "k", "nk", "rk",
934                        "kŗ", "kŗ", "kŗ", "kŗ", "kŗ", "nkŗ", "rkŗ",
935                        "kŕ", "kŕ", "kŕ", "kŕ", "kŕ", "nkŕ", "rkŕ",
936                        "kļ", "kļ", "kļ", "kļ", "kļ", "nkļ", "rkļ",
937
938                        "c", "c", "c", "c", "c", "c", "cŗ", "cŕ", "cļ",
939                        "ţ", "t", "t", "t", "t", "t", "nt", "rt",
940                        "ţ", "t", "t", "t", "t", "nt", "rt",
941                        "ţ", "t", "t", "t", "t", "nt", "rt",
942                        "ţ", "t", "t", "t", "t", "nt", "rt",
943                        "ţ", "t", "t", "t", "t", "nt", "rt",
944                        "ţ", "t", "t", "t", "t", "nt", "rt",
945                        "ţ", "t", "t", "t", "t", "nt", "rt",
946                        "ţ", "t", "t", "t", "t", "nt", "rt",
947                        "ţ", "t", "t", "t", "t", "nt", "rt",
948                        "tŗ", "tŗ", "tŗ", "tŗ", "tŗ", "ntŗ", "rtŗ",
949                        "tŕ", "tŕ", "tŕ", "tŕ", "tŕ", "ntŕ", "rtŕ",
950                        "tŗ", "tŗ", "tŗ", "tŗ", "tŗ", "ntŗ", "rtŗ",
951                        "tŕ", "tŕ", "tŕ", "tŕ", "tŕ", "ntŕ", "rtŕ",
952
953                        "p", "p", "p", "p", "p", "np", "rp",
954                        "p", "p", "p", "p", "p", "np", "rp",
955                        "p", "p", "p", "p", "p", "np", "rp",
956                        "p", "p", "p", "p", "p", "np", "rp",
957                        "p", "p", "p", "p", "p", "np", "rp",
958                        "p", "p", "p", "p", "p", "np", "rp",
959                        "p", "p", "p", "p", "p", "np", "rp",
960                        "p", "p", "p", "p", "p", "np", "rp",
961                        "p", "p", "p", "p", "p", "np", "rp",
962                        "p", "p", "p", "p", "p", "np", "rp",
963                        "pŗ", "pŗ", "pŗ", "pŗ", "pŗ", "npŗ", "rpŗ",
964                        "pŕ", "pŕ", "pŕ", "pŕ", "pŕ", "npŕ", "rpŕ",
965                        "pļ", "pļ", "pļ", "pļ", "pļ", "npļ", "rpļ",
966                        "pĺ", "pĺ", "pĺ", "pĺ", "pĺ", "npĺ", "rpĺ",
967                        "pŗ", "pŗ", "pŗ", "pŗ", "pŗ", "npŗ", "rpŗ",
968                        "pŕ", "pŕ", "pŕ", "pŕ", "pŕ", "npŕ", "rpŕ",
969                        "p", "p", "p", "p", "p", "np", "rp",
970                        "p", "p", "p", "p", "p", "np", "rp",
971
972                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
973                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
974                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
975                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
976                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
977                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
978                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
979                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
980                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
981                        "kh", "kh", "kh", "kh", "kh", "nkh", "rkh",
982                        "khŗ", "khŗ", "khŗ", "khŗ", "khŗ", "nkhŗ", "rkhŗ",
983                        "khŕ", "khŕ", "khŕ", "khŕ", "khŕ", "nkhŕ", "rkhŕ",
984                        "khļ", "khļ", "khļ", "khļ", "khļ", "nkhļ", "rkhļ",
985                        "khĺ", "khĺ", "khĺ", "khĺ", "khĺ", "nkhĺ", "rkhĺ",
986
987                        "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "chŗ", "chŕ", "chļ", "chĺ",
988                        "ţh", "th", "th", "th", "th", "th", "nth", "rth",
989                        "th", "th", "th", "th", "th", "nth", "rth",
990                        "th", "th", "th", "th", "th", "nth", "rth",
991                        "th", "th", "th", "th", "th", "nth", "rth",
992                        "th", "th", "th", "th", "th", "nth", "rth",
993                        "th", "th", "th", "th", "th", "nth", "rth",
994                        "th", "th", "th", "th", "th", "nth", "rth",
995                        "th", "th", "th", "th", "th", "nth", "rth",
996                        "th", "th", "th", "th", "th", "nth", "rth",
997                        "thŗ", "thŗ", "thŗ", "thŗ", "thŗ", "nthŗ", "rthŗ",
998                        "thŕ", "thŕ", "thŕ", "thŕ", "thŕ", "nthŕ", "rthŕ",
999                        "thļ", "thļ", "thļ", "thļ", "thļ", "nthļ", "rthļ",
1000                        "thĺ", "thĺ", "thĺ", "thĺ", "thĺ", "nthĺ", "rthĺ",
1001
1002                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1003                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1004                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1005                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1006                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1007                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1008                        "ph", "ph", "ph", "ph", "ph", "nph", "rph",
1009                        "phŗ", "phŗ", "phŗ", "phŗ", "phŗ", "nphŗ", "rphŗ",
1010                        "phŕ", "phŕ", "phŕ", "phŕ", "phŕ", "nphŕ", "rphŕ",
1011                        "phļ", "phļ", "phļ", "phļ", "phļ", "nphļ", "rphļ",
1012                        "phĺ", "phĺ", "phĺ", "phĺ", "phĺ", "nphĺ", "rphĺ",
1013
1014                        "g", "g", "g", "g", "g", "ng", "rg",
1015                        "j", "j", "j", "j", "j", "nj", "rj",
1016                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1017                        "d", "d", "d", "d", "d", "nd", "rd",
1018                        "b", "b", "b", "b", "b", "nb", "rb",
1019                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1020                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1021                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1022                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1023
1024                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1025                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1026                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1027                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1028                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1029                        "s", "s", "s", "s", "s", "ns", "rs",
1030
1031                        "g", "g", "g", "g", "g", "ng", "rg",
1032                        "j", "j", "j", "j", "j", "nj", "rj",
1033                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1034                        "d", "d", "d", "d", "d", "nd", "rd",
1035                        "b", "b", "b", "b", "b", "nb", "rb",
1036                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1037                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1038                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1039                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1040
1041                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1042                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1043                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1044                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1045                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1046                        "s", "s", "s", "s", "s", "ns", "rs",
1047
1048                        "g", "g", "g", "g", "g", "ng", "rg",
1049                        "j", "j", "j", "j", "j", "nj", "rj",
1050                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1051                        "d", "d", "d", "d", "d", "nd", "rd",
1052                        "b", "b", "b", "b", "b", "nb", "rb",
1053                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1054                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1055                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1056                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1057
1058                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1059                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1060                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1061                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1062                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1063                        "s", "s", "s", "s", "s", "ns", "rs",
1064
1065                        "g", "g", "g", "g", "g", "ng", "rg",
1066                        "j", "j", "j", "j", "j", "nj", "rj",
1067                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1068                        "d", "d", "d", "d", "d", "nd", "rd",
1069                        "b", "b", "b", "b", "b", "nb", "rb",
1070                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1071                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1072                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1073                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1074
1075                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1076                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1077                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1078                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1079                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1080                        "s", "s", "s", "s", "s", "ns", "rs",
1081
1082                        "g", "g", "g", "g", "g", "ng", "rg",
1083                        "j", "j", "j", "j", "j", "nj", "rj",
1084                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1085                        "d", "d", "d", "d", "d", "nd", "rd",
1086                        "b", "b", "b", "b", "b", "nb", "rb",
1087                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1088                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1089                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1090                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1091
1092                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1093                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1094                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1095                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1096                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1097                        "s", "s", "s", "s", "s", "ns", "rs",
1098
1099                        "g", "g", "g", "g", "g", "ng", "rg",
1100                        "j", "j", "j", "j", "j", "nj", "rj",
1101                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1102                        "d", "d", "d", "d", "d", "nd", "rd",
1103                        "b", "b", "b", "b", "b", "nb", "rb",
1104                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1105                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1106                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1107                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1108
1109                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1110                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1111                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1112                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1113                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1114                        "s", "s", "s", "s", "s", "ns", "rs",
1115
1116                        "g", "g", "g", "g", "g", "ng", "rg",
1117                        "j", "j", "j", "j", "j", "nj", "rj",
1118                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1119                        "d", "d", "d", "d", "d", "nd", "rd",
1120                        "b", "b", "b", "b", "b", "nb", "rb",
1121                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1122                        "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1123                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1124                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1125
1126                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1127                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1128                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1129                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1130                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1131                        "s", "s", "s", "s", "s", "ns", "rs",
1132
1133                        "g", "g", "g", "g", "g", "ng", "rg",
1134                        "j", "j", "j", "j", "j", "nj", "rj",
1135                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1136                        "d", "d", "d", "d", "d", "nd", "rd",
1137                        "b", "b", "b", "b", "b", "nb", "rb",
1138                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1139                        "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1140                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1141                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1142
1143                        "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1144                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1145                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1146                        "ś", "ś", "ś", "ś", "ś", "nś", "rś",
1147                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1148                        "s", "s", "s", "s", "s", "ns", "rs",
1149
1150                        "g", "g", "g", "g", "g", "ng", "rg",
1151                        "j", "j", "j", "j", "j", "nj", "rj",
1152                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1153                        "d", "d", "d", "d", "d", "nd", "rd",
1154                        "b", "b", "b", "b", "b", "nb", "rb",
1155                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1156                        "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1157                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1158                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1159
1160                        "ń", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1161                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1162                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1163                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1164                        "s", "s", "s", "s", "s", "ns", "rs",
1165
1166                        "g", "g", "g", "g", "g", "ng", "rg",
1167                        "j", "j", "j", "j", "j", "nj", "rj",
1168                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1169                        "d", "d", "d", "d", "d", "nd", "rd",
1170                        "b", "b", "b", "b", "b", "nb", "rb",
1171                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1172                        "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1173                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1174                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1175
1176                        "ń", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm",
1177                        "h", "y", "y", "y", "y", "y", "ny", "ry",
1178                        "r", "l", "v", "v", "v", "v", "v", "nv", "rv",
1179                        "ş", "ş", "ş", "ş", "ş", "nş", "rş",
1180                        "s", "s", "s", "s", "s", "ns", "rs",
1181
1182                        "g", "g", "g", "g", "g", "ng", "rg",
1183                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1184                        "d", "d", "d", "d", "d", "nd", "rd",
1185                        "b", "b", "b", "b", "b", "nb", "rb",
1186                        "gh", "gh", "gh", "gh", "gh", "ngh", "rgh",
1187                        "đh", "đh", "đh", "đh", "đh", "nđh", "rđh",
1188                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1189                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1190                        "n", "m", "m", "m", "m", "m", "nm", "rm",
1191                        "v", "v", "v", "v", "v", "nv", "rv",
1192                        "s", "s", "s", "s", "s", "ns", "rs",
1193
1194                        "g", "g", "g", "g", "g", "ng", "rg",
1195                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1196                        "d", "d", "d", "d", "d", "nd", "rd",
1197                        "b", "b", "b", "b", "b", "nb", "rb",
1198                        "g", "g", "g", "g", "g", "ng", "rg",
1199                        "d", "d", "d", "d", "d", "nd", "rd",
1200                        "b", "b", "b", "b", "b", "nb", "rb",
1201                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1202                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1203                        "n", "m", "m", "m", "m", "m", "nm", "rm",
1204                        "v", "v", "v", "v", "v", "nv", "rv",
1205
1206                        "g", "g", "g", "g", "g", "ng", "rg",
1207                        "đ", "đ", "đ", "đ", "đ", "nđ", "rđ",
1208                        "d", "d", "d", "d", "d", "nd", "rd",
1209                        "b", "b", "b", "b", "b", "nb", "rb",
1210                        "g", "g", "g", "g", "g", "ng", "rg",
1211                        "d", "d", "d", "d", "d", "nd", "rd",
1212                        "b", "b", "b", "b", "b", "nb", "rb",
1213                        "dh", "dh", "dh", "dh", "dh", "ndh", "rdh",
1214                        "bh", "bh", "bh", "bh", "bh", "nbh", "rbh",
1215                        "n", "m", "m", "m", "m", "m", "nm", "rm",
1216                        "v", "v", "v", "v", "v", "nv", "rv",
1217                },
1218                new String[]{"t", "d", "m", "r", "dh", "b", "t", "d", "m", "r", "dh", "bh", "nt", "nt", "nk", "ş"},
1219                new String[]{"it", "it", "ati", "adva", "aş", "arma", "ardha", "abi", "ab", "aya"},
1220                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{1, 2, 3, 3, 1}, 0.15, 0.75, 0.0, 0.12, null, true);
1221    }
1222    /**
1223     * Imitation Hindi, romanized to use the Latin alphabet using accented glyphs similar to the IAST standard.
1224     * Most fonts do not support the glyphs that IAST-standard romanization of Hindi needs, so this uses alternate
1225     * glyphs from at most Latin Extended-A. Relative to the IAST standard, the glyphs {@code "ṛṝḷḹḍṭṅṇṣṃḥ"} become
1226     * {@code "ŗŕļĺđţńņşĕĭ"}, with the nth glyph in the first string being substituted with the nth glyph in the second
1227     * string. You may want to get a variant on this language with {@link #removeAccents()} if you can't display the
1228     * less-commonly-supported glyphs {@code āīūĕĭáíúóŗŕļţĺđńñņśş}. For some time SquidLib had a separate version of
1229     * imitation Hindi that was accurate to the IAST standard, but this version is more usable because font support is
1230     * much better for the glyphs it uses, so the IAST kind was removed (it added quite a bit of code for something that
1231     * was mostly unusable).
1232     * <br>
1233     * Darvāga yar; ghađhinopŕauka āĕrdur, conśaigaijo śabhodhaĕđū jiviđaudu.
1234     */
1235    public static final FakeLanguageGen HINDI_ROMANIZED = hindi().register("Hindi Romanized");
1236
1237    private static FakeLanguageGen arabic(){
1238        return new FakeLanguageGen(
1239                new String[]{"a", "a", "a", "a", "a", "a", "aa", "aa", "aa", "ai", "au",
1240                        "a", "i", "u", "a", "i", "u",
1241                        "i", "i", "i", "i", "i", "ii", "ii", "ii",
1242                        "u", "u", "u", "uu", "uu",
1243                },
1244                new String[]{},
1245                new String[]{"gh", "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw",
1246                        "zh", "khm", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1247                        "q", "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r",
1248                        "q", "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1249                        "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1250                        "al-", "al-", "ibn-",
1251                },
1252                new String[]{
1253                        "kk", "kk", "kk", "kk", "kk", "dd", "dd", "dd", "dd",
1254                        "nj", "mj", "bj", "mj", "bj", "mj", "bj", "dj", "dtj", "dhj",
1255                        "nz", "nzh", "mz", "mzh", "rz", "rzh", "bz", "dz", "tz",
1256                        "s-h", "sh-h", "shw-h", "tw", "bn", "fq", "hz", "hl", "khm",
1257                        "lb", "lz", "lj", "lf", "ll", "lk", "lq", "lg", "ln"
1258                },
1259                new String[]{
1260                        "gh", "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw", "dt", "jj",
1261                        "zh", "khm", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1262                        "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r",
1263                        "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1264                        "k", "f", "l", "z", "h", "h", "j", "s", "r",
1265                        "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw", "dt", "jj",
1266                        "zh", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1267                        "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r",
1268                        "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1269                        "k", "f", "l", "z", "h", "h", "j", "s", "r",
1270                },
1271                new String[]{"aagh", "aagh", "ari", "ari", "aiid", "uuq", "ariid", "adih", "ateh", "adesh", "amiit", "it",
1272                        "iit", "akhmen", "akhmed", "ani", "abiib", "iib", "uuni", "iiz", "aqarii", "adiiq",
1273                },
1274                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 5, 5, 1}, 0.55, 0.65, 0.0, 0.15, arabicSanityChecks, true);
1275    }
1276    /**
1277     * Imitation Arabic, using mostly the Latin alphabet but with some Greek letters for tough transliteration topics.
1278     * It's hard to think of a more different (widely-spoken) language to romanize than Arabic. Written Arabic does not
1279     * ordinarily use vowels (the writing system is called an abjad, in contrast to an alphabet), and it has more than a
1280     * few sounds that are very different from those in English. This version, because of limited support in fonts and
1281     * the need for separate words to be distinguishable with regular expressions, uses somewhat-accurate digraphs or
1282     * trigraphs instead of the many accented glyphs (not necessarily supported by most fonts) in most romanizations of
1283     * Arabic, and this scheme uses no characters from outside ASCII.
1284     * <br>
1285     * Please try to be culturally-sensitive about how you use this generator. Classical Arabic (the variant that
1286     * normally marks vowels explicitly and is used to write the Qur'an) has deep religious significance in Islam, and
1287     * if you machine-generate text that (probably) isn't valid Arabic, but claim that it is real, or that it has
1288     * meaning when it actually doesn't, that would be an improper usage of what this generator is meant to do. In a
1289     * fantasy setting, you can easily confirm that the language is fictional and any overlap is coincidental; an
1290     * example of imitation Arabic in use is the Dungeons and Dragons setting, Al-Qadim, which according to one account
1291     * sounds similar to a word in real Arabic (that does not mean anything like what the designer was aiming for). In a
1292     * historical setting, FakeLanguageGen is probably "too fake" to make a viable imitation for any language, and may
1293     * just sound insulting if portrayed as realistic. You may want to mix ARABIC_ROMANIZED with a very different kind
1294     * of language, like GREEK_ROMANIZED or RUSSIAN_AUTHENTIC, to emphasize that this is not a real-world language.
1295     * <br>
1296     * Hiijakki al-aafusiib rihit, ibn-ullukh aj shwisari!
1297     */
1298    public static final FakeLanguageGen ARABIC_ROMANIZED = arabic().register("Arabic Romanized");
1299    /*
1300    public static final FakeLanguageGen ARABIC_ROMANIZED = new FakeLanguageGen(
1301            new String[]{"a", "a", "a", "a", "a", "a", "ā", "ā", "ā", "ai", "au",
1302                    "a", "i", "u", "a", "i", "u",
1303                    "i", "i", "i", "i", "i", "ī", "ī", "ī",
1304                    "u", "u", "u", "ū", "ū",
1305            },
1306            new String[]{},
1307            new String[]{"δ", "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť",
1308                    "ż", "ξ", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1309                    "q", "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r",
1310                    "q", "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1311                    "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1312                    "al-", "al-", "ibn-",
1313            },
1314            new String[]{
1315                    "kk", "kk", "kk", "kk", "kk", "dd", "dd", "dd", "dd",
1316                    "nj", "mj", "bj", "mj", "bj", "mj", "bj", "dj", "ďj", "đj",
1317                    "nz", "nż", "mz", "mż", "rz", "rż", "bz", "dz", "tz",
1318                    "s-h", "š-h", "ş-h", "tw", "bn", "fq", "hz", "hl", "ĥm",
1319                    "lb", "lz", "lj", "lf", "ll", "lk", "lq", "lg", "ln"
1320            },
1321            new String[]{
1322                    "δ", "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť",
1323                    "ż", "ξ", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1324                    "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r",
1325                    "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1326                    "k", "f", "l", "z", "h", "h", "j", "s", "r",
1327                    "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť",
1328                    "ż", "g", "f", "q", "k", "l", "m", "n", "h", "w",
1329                    "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r",
1330                    "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r",
1331                    "k", "f", "l", "z", "h", "h", "j", "s", "r",
1332            },
1333            new String[]{"āδ", "āδ", "ari", "ari", "aīd", "ūq", "arīd", "adih", "ateh", "adeš", "amīt", "it",
1334                    "īt", "aĥmen", "aĥmed", "ani", "abīb", "īb", "ūni", "īz", "aqarī", "adīq",
1335            },
1336            new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 5, 5, 1}, 0.55, 0.65, 0.0, 0.15, arabicSanityChecks, true);
1337            */
1338
1339    private static FakeLanguageGen inuktitut(){
1340        return new FakeLanguageGen(
1341                new String[]{"a", "a", "a", "a", "a", "aa", "aa", "aa", "aa", "i", "i", "i", "ii", "ii", "u", "u", "u", "uu", "uu", "ai", "ia", "iu", "ua", "ui"},
1342                new String[]{},
1343                new String[]{"p", "t", "k", "q", "s", "l", "h", "v", "j", "g", "r", "m", "n",
1344                        "t", "t", "t", "t", "k", "k", "q", "q", "n", "n", "n", "n", "g", "l"},
1345                new String[]{"pp", "tt", "kk", "pk", "tk", "gk", "kp", "kt", "kg", "pq", "tq", "gq", "ss", "ll", "rr", "mm",
1346                        "nn", "nng", "ng", "ng",
1347                        "ll", "nn", "nn", "nn",},
1348                new String[]{"n", "t", "q", "k", "n", "t", "q", "k", "n", "t", "q", "k", "n", "t", "q", "k", "p", "s", "m", "g", "g", "ng", "ng", "ng"},
1349                new String[]{"itut", "uit", "uq", "iuq", "iaq", "aq", "it", "aat", "aak", "aan", "ait", "ik", "uut", "un", "unnun",
1350                        "ung", "ang", "ing", "iin", "iit", "iik", "in",
1351                        "uq", "iaq", "aq", "ik", "it", "uit", "ut", "ut", "at", "un", "in"
1352                },
1353                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 4, 6, 5, 4}, 0.45, 0.0, 0.0, 0.25, null, true);
1354    }
1355    /**
1356     * Imitation text from an approximation of one of the Inuktitut languages spoken by various people of the Arctic and
1357     * nearby areas. This is likely to be hard to pronounce. Inuktitut is the name accepted in Canada for one language
1358     * family of that area, but other parts of the Arctic circle speak languages with varying levels of difference from
1359     * this style of generated text. The term "Inuit language" may be acceptable, but "Eskimo language" is probably not,
1360     * and when that term is not considered outright offensive it refers to a different language group anyway (more
1361     * properly called Yupik or Yup'ik, and primarily spoken in Siberia instead of Canada and Alaska).
1362     * <br>
1363     * Ugkangungait ninaaq ipkutuilluuq um aitqiinnaitunniak tillingaat.
1364     */
1365    public static final FakeLanguageGen INUKTITUT = inuktitut().register("Inuktitut");
1366
1367    private static FakeLanguageGen norse(){
1368        return new FakeLanguageGen(
1369                new String[]{"a","a","a","á","á","au","e","e","e","é","é","ei","ey","i","i","í","í","y","y","ý","ý",
1370                        "o","o","o","ó","ó","u","u","u","ú","ú","æ","æ","æ","ö","ö",},
1371                new String[]{},
1372                new String[]{"b","bl","br","bj","d","dr","dj","ð","ðl","ðr","f","fl","flj","fr","fn","fj","g","gn","gj","h",
1373                        "hj","hl","hr","hv","j","k","kl","kr","kn","kj","l","lj","m","mj","n","nj","p","pl","pr","pj","r",
1374                        "rj","s","sj","sl","sn","sp","st","str","skr","skj","sþ","sð","t","tj","v","vl","vr","vj","þ","þl","þr",
1375
1376                        "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð",
1377                        "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð",
1378                        "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð",
1379                        "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð",
1380                        "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð",
1381
1382                        "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t",
1383                        "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t",
1384                        "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t",
1385                        "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t",
1386                },
1387                new String[]{"bd","bf","bg","bk","bl","bp","br","bt","bv","bm","bn","bð","bj",
1388                        "db","df","dg","dk","dl","dp","dr","dt","dv","dm","dn","dð","dþ","dj","ndk","ndb","ndg","ndl","nds","nds",
1389                        "ðl","ðr","ðk","ðj","ðg","ðd","ðb","ðp","ðs",
1390                        "fb","fd","fg","fk","fl","fp","fr","fs","ft","fv","fm","fn","fð","fj",
1391                        "gb","gd","gf","gk","gl","gp","gr","gt","gv","gm","gn","gð","gj",
1392                        "h","hj","hl","hr","hv",
1393                        "kb","kd","kf","kp","kv","km","kn","kð","kl","kr","nkj","nkr","nkl",
1394                        "lbr","ldr","lfr","lg","lgr","lj","lkr","ln","ls","ltr","lv","lð","lðr","lþ",
1395                        "mb","md","mk","mg","ml","mp","mr","ms","mt","mv","mð","mþ","mj",
1396                        "nb","nl","np","nr","nv","nð","nþ","nj",
1397                        "ngl","ngb","ngd","ngk","ngp","ngt","ngv","ngm","ngð","ngþ","ngr",
1398                        "mbd","mbg","mbs","mbt","ldg","ldn","ldk","lds","rðn","rðl","gðs","gðr",
1399                        "pb","pd","pg","pk","pl","pr","ps","psj","pð","pj",
1400                        "rl","rbr","rdr","rg","rgr","rkr","rpr","rs","rts","rtr","rv","rj",
1401                        "sb","sbr","sd","sdr","sf","sfj","sg","skr","skl","sm","sn","str","sv","sð","sþ","sj",
1402                        "tr","tn","tb","td","tg","tv","tf","tj","tk","tm","tp",},
1403                new String[]{"kk","ll","nn","pp","tt","kk","ll","nn","pp","tt",
1404                        "bs","ds","gs","x","rn","gn","gt","gs","ks","kt","nt","nd","nk","nt","ng","ngs","ns",
1405                        "ps","pk","pt","pts","lb","ld","lf","lk","lm","lp","lps","lt",
1406                        "rn","rb","rd","rk","rp","rt","rm","rð","rþ","sk","sp","st","ts",
1407                        "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ",
1408                        "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ",
1409                        "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ",
1410
1411                        "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t",
1412                        "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t",
1413                        "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t",
1414                },
1415                new String[]{"etta","eþa","uinn","ing","ard","eign","ef","efs","eg","ir","ir","ir","ir","ír","ír","ar","ar",
1416                        "ar","ár","or","or","ór","ör","on","on","ón","onn","unn","ung","ut","ett","att","ot"},
1417                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 5, 4, 3, 1}, 0.25, 0.5, 0.0, 0.08, genericSanityChecks, true);
1418    }
1419    /**
1420     * Somewhat close to Old Norse, which is itself very close to Icelandic, so this uses Icelandic spelling rules. Not
1421     * to be confused with the language(s) of Norway, where the Norwegian languages are called norsk, and are further
1422     * distinguished into Bokmål and Nynorsk. This should not be likely to seem like any form of Norwegian, since it
1423     * doesn't have the a-with-ring letter 'å' and has the letters eth ('Ðð') and thorn ('Þþ'). If you want to remove
1424     * any letters not present on a US-ASCII keyboard, you can use {@link Modifier#SIMPLIFY_NORSE} on this language or
1425     * some mix of this with other languages; it also changes some of the usage of "j" where it means the English "y"
1426     * sound, making "fjord" into "fyord", which is closer to familiar uses from East Asia like "Tokyo" and "Pyongyang".
1427     * You can also now use {@link #NORSE_SIMPLIFIED} directly, which is probably easiest.
1428     * <br>
1429     * Leyrk tjör stomri kna snó æd ðrépdápá, prygso?
1430     */
1431    public static final FakeLanguageGen NORSE = norse().register("Norse");
1432
1433    private static FakeLanguageGen nahuatl(){
1434        return new FakeLanguageGen(
1435                new String[]{"a", "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "i", "o", "o", "o", "e", "e", "eo", "oa", "ea"},
1436                new String[]{},
1437                new String[]{"ch", "c", "h", "m", "l", "n", "p", "t", "tl", "tz", "x", "y", "z", "hu", "cu",
1438                        "l", "l", "l", "p", "p", "t", "t", "t", "t", "t", "tl", "tl", "tz", "z", "x", "hu"},
1439                new String[]{"zp", "ztl", "zc", "zt", "zl", "ct", "cl", "pl", "mt", "mc", "mch", "cz", "tc", "lc",
1440                        "hu", "hu", "hu", "cu"},
1441                new String[]{
1442                        "ch", "c", "h", "m", "l", "n", "p", "t", "tl", "tz", "x", "y", "z",
1443                        "l", "l", "l", "l", "p", "t", "t", "t", "tl", "tl", "tz", "tz", "z", "x"
1444                },
1445                new String[]{"otl", "eotl", "ili", "itl", "atl", "atli", "oca", "itli", "oatl", "al", "ico", "acual",
1446                        "ote", "ope", "oli", "ili", "acan", "ato", "atotl", "ache", "oc", "aloc", "ax", "itziz", "iz"
1447                },
1448                new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{3, 4, 5, 4, 3, 1}, 0.3, 0.2, 0.0, 0.3, genericSanityChecks, true)
1449                .addModifiers(new Modifier("c([ie])", "qu$1"),
1450                        new Modifier("z([ie])", "c$1"));
1451    }
1452
1453    /**
1454     * Imitation text from an approximation of the language spoken by the Aztec people and also over a million
1455     * contemporary people in parts of Mexico. This is may be hard to pronounce, since it uses "tl" as a normal
1456     * consonant (it can start or end words), but is mostly a fairly recognizable style of language.
1457     * <br>
1458     * Olcoletl latl palitz ach; xatatli tzotloca amtitl, xatloatzoatl tealitozaztitli otamtax?
1459     */
1460    public static final FakeLanguageGen NAHUATL = nahuatl().register("Nahuatl");
1461
1462    private static FakeLanguageGen mongolian(){
1463        return new FakeLanguageGen(
1464                new String[]{"a", "a", "a", "a", "a", "a", "a", "aa", "aa", "e", "i", "i", "i", "i", "i", "i", "i", "i", "ii",
1465                        "o", "o", "o", "o", "oo", "u", "u", "u", "u", "u", "u", "u", "u", "uu", "uu", "ai", "ai"},
1466                new String[]{},
1467                new String[]{"g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "j", "j", "j", "j",
1468                        "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "y", "y"},
1469                new String[]{},
1470                new String[]{"g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "gh", "j", "j", "j",
1471                        "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "g", "n",
1472                        "g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "gh", "j", "j", "j", "n",
1473                        "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "y", "y",
1474                        "ng", "ng", "ng", "ngh", "ngh", "lj", "gch", "sd", "rl", "bl", "sd", "st", "md", "mg", "gd", "gd",
1475                        "sv", "rg", "rg", "mr", "tn", "tg", "ds", "dh", "dm", "gts", "rh", "lb", "gr", "gy", "rgh"},
1476                new String[]{"ei", "ei", "ei", "uulj", "iig", "is", "is", "an", "aan", "iis", "alai", "ai", "aj", "ali"
1477                },
1478                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 9, 3, 1}, 0.3, 0.2, 0.0, 0.07, null, true);
1479    }
1480
1481    /**
1482     * Imitation text from an approximation of one of the languages spoken in the 13th-century Mongol Empire. Can be
1483     * hard to pronounce. This is closest to Middle Mongolian, and is probably not the best way to approximate modern
1484     * Mongolian, which was written for many years in the Cyrillic alphabet (same alphabet as Russian) and has changed a
1485     * lot in other ways.
1486     * <br>
1487     * Ghamgarg zilijuub lirgh arghar zunghichuh naboogh.
1488     */
1489    public static final FakeLanguageGen MONGOLIAN = mongolian().register("Mongolian");
1490
1491    /**
1492     * A mix of four different languages, using only ASCII characters, that is meant for generating single words for
1493     * creature or place names in fantasy settings.
1494     * <br>
1495     * Adeni, Sainane, Caneros, Sune, Alade, Tidifi, Muni, Gito, Lixoi, Bovi...
1496     */
1497    public static final FakeLanguageGen FANTASY_NAME = GREEK_ROMANIZED.mix(
1498            RUSSIAN_ROMANIZED.mix(
1499                    FRENCH.removeAccents().mix(
1500                            JAPANESE_ROMANIZED, 0.5), 0.85), 0.925).register("Fantasy");
1501    /**
1502     * A mix of four different languages with some accented characters added onto an ASCII base, that can be good for
1503     * generating single words for creature or place names in fantasy settings that should have a "fancy" feeling from
1504     * having unnecessary accents added primarily for visual reasons.
1505     * <br>
1506     * Askieno, Blarcīnũn, Mēmida, Zizhounkô, Blęrinaf, Zemĭ, Mónazôr, Renerstă, Uskus, Toufounôr...
1507     */
1508    public static final FakeLanguageGen FANCY_FANTASY_NAME = FANTASY_NAME.addAccents(0.47, 0.07).register("Fancy Fantasy");
1509
1510    private static FakeLanguageGen goblin(){
1511        return new FakeLanguageGen(
1512                new String[]{"a", "a", "a", "a",
1513                        "e", "e",
1514                        "i", "i", "i",
1515                        "o", "o", "o", "o",
1516                        "u", "u", "u", "u", "u", "u", "u",
1517                },
1518                new String[]{},
1519                new String[]{"b", "g", "d", "m", "h", "n", "r", "v", "sh", "p", "w", "y", "f", "br", "dr", "gr", "pr", "fr",
1520                        "br", "dr", "gr", "pr", "fr", "bl", "dw", "gl", "gw", "pl", "fl", "hr",
1521                        "b", "g", "d", "m", "h", "n", "r", "b", "g", "d", "m", "h", "n", "r",
1522                        "b", "g", "d", "m", "r", "b", "g", "d", "r",
1523                },
1524                new String[]{
1525                        "br", "gr", "dr", "pr", "fr", "rb", "rd", "rg", "rp", "rf",
1526                        "br", "gr", "dr", "rb", "rd", "rg",
1527                        "mb", "mg", "md", "mp", "mf", "bm", "gm", "dm", "pm", "fm",
1528                        "mb", "mg", "md", "bm", "gm", "dm",
1529                        "bl", "gl", "dw", "pl", "fl", "lb", "ld", "lg", "lp", "lf",
1530                        "bl", "gl", "dw", "lb", "ld", "lg",
1531                        "nb", "ng", "nd", "np", "nf", "bn", "gn", "dn", "pn", "fn",
1532                        "nb", "ng", "nd", "bn", "gn", "dn",
1533                        "my", "gy", "by", "py", "mw", "gw", "bw", "pw",
1534                        "bg", "gb", "bd", "db", "bf", "fb",
1535                        "gd", "dg", "gp", "pg", "gf", "fg",
1536                        "dp", "pd", "df", "fd",
1537                        "pf", "fp",
1538                        "bg", "gb", "bd", "db", "gd", "dg",
1539                        "bg", "gb", "bd", "db", "gd", "dg",
1540                        "bg", "gb", "bd", "db", "gd", "dg",
1541                        "bg", "gb", "bd", "db", "gd", "dg",
1542                        "bg", "gb", "bd", "db", "gd", "dg",
1543                },
1544                new String[]{
1545                        "b", "g", "d", "m", "n", "r", "sh", "p", "f",
1546                        "b", "g", "d", "m", "n", "r", "b", "g", "d", "m", "n", "r", "sh",
1547                        "b", "g", "d", "m", "r", "b", "g", "d", "r",
1548                        "rb", "rd", "rg", "rp", "rf", "lb", "ld", "lg", "lp", "lf",
1549                },
1550                new String[]{},
1551                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{3, 7, 5, 1}, 0.1, 0.15, 0.0, 0.0, genericSanityChecks, true);
1552    }
1553    /**
1554     * Fantasy language that might be suitable for stealthy humanoids, such as goblins, or as a secret language used
1555     * by humans who want to avoid notice. Uses no "hard" sounds like "t" and "k", but also tries to avoid the flowing
1556     * aesthetic of fantasy languages associated with elves. Tends toward clusters of consonants like "bl", "gm", "dg",
1557     * and "rd".
1558     * <br>
1559     * Gwabdip dwupdagorg moglab yurufrub.
1560     */
1561    public static final FakeLanguageGen GOBLIN = goblin().register("Goblin");
1562
1563    private static FakeLanguageGen elf(){
1564        return new FakeLanguageGen(
1565                new String[]{"a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
1566                        "a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
1567                        "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
1568                        "ai", "ai", "ai", "ea", "ea", "ea", "ia", "ae"
1569                },
1570                new String[]{
1571                        "ai", "ai", "ae", "ea", "ia", "ie",
1572                        "â", "â", "ai", "âi", "aî", "aï", "î", "î", "ï", "ï", "îe", "iê", "ïe", "iê",
1573                        "e", "ë", "ë", "ëa", "ê", "êa", "eâ", "ei", "eî", "o", "ô",
1574                        "a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
1575                        "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o",
1576                        "ai", "ai", "ai", "ai", "ai", "ei", "ei", "ei", "ea", "ea", "ea", "ea",
1577                        "ie", "ie", "ie", "ie", "ie", "ia", "ia", "ia", "ia"
1578                },
1579                new String[]{"l", "r", "n", "m", "th", "v", "s", "sh", "z", "f", "p", "h", "y", "c",
1580                        "l", "r", "n", "m", "th", "v", "f", "y",
1581                        "l", "r", "n", "m", "th", "v", "f",
1582                        "l", "r", "n", "th", "l", "r", "n", "th",
1583                        "l", "r", "n", "l", "r", "n", "l", "r", "n",
1584                        "pl", "fy", "ly", "cl", "fr", "pr", "qu",
1585                },
1586                new String[]{"rm", "ln", "lv", "lth", "ml", "mv", "nv", "vr", "rv", "ny", "mn", "nm", "ns", "nth"},
1587                new String[]{
1588                        "l", "r", "n", "m", "th", "s",
1589                        "l", "r", "n", "th", "l", "r", "n", "th",
1590                        "l", "r", "n", "l", "r", "n", "l", "r", "n",
1591                        "r", "n", "r", "n", "r", "n", "n", "n", "n", "n"
1592                },
1593                new String[]{},
1594                new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 6, 6, 3, 1}, 0.4, 0.3, 0.0, 0.0, genericSanityChecks, true);
1595    }
1596
1597    /**
1598     * Fantasy language that tries to imitate the various languages spoken by elves in J.R.R. Tolkien's works, using
1599     * accented vowels occasionally and aiming for long, flowing, vowel-heavy words. It's called ELF because there isn't
1600     * a consistent usage across fantasy and mythological sources of either "elvish", "elfish", "elven", "elfin", or any
1601     * one adjective for "relating to an elf." In the GDX display module, the "smooth" and "unicode" fonts, among
1602     * others, support all the accented characters you need for this.
1603     * <br>
1604     * Il ilthiê arel enya; meâlelail theasor arôreisa.
1605     */
1606    public static final FakeLanguageGen ELF = elf().register("Elf");
1607
1608    private static FakeLanguageGen demonic(){
1609        return new FakeLanguageGen(
1610                new String[]{"a", "a", "a", "a",
1611                        "e",
1612                        "i", "i",
1613                        "o", "o", "o", "o", "o",
1614                        "u", "u", "u", "u", "u",
1615                },
1616                new String[]{},
1617                new String[]{
1618                        "b", "bh", "d", "dh", "t", "tl", "ts", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", "v", "y",
1619                        "br", "bhr", "dr", "dhr", "tr", "tsr", "kr", "khr", "gr", "ghr", "fr", "shr", "vr",
1620                        "bl", "bhl", "tsl", "kl", "chl", "khl", "gl", "ghl", "fl", "sl", "zl", "vl",
1621                        "dz", "chf", "sf", "shf", "zv", "st", "sk",
1622                        "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr",
1623                        "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr",
1624                        "x", "x", "x", "gh", "gh", "ghr",
1625                        "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr",
1626                        "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr",
1627                        "x", "x", "x", "gh", "gh", "ghr",
1628                        "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr",
1629                        "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr",
1630                        "x", "x", "x", "gh", "gh", "ghr",
1631                },
1632                new String[]{},
1633                new String[]{
1634                        "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r",
1635                        "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r",
1636                        "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r",
1637                        "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r",
1638                        "rb", "rbs", "rbh", "rd", "rds", "rdh", "rt", "rts", "rk", "rks", "rch", "rkh", "rg", "rsh", "rv", "rz",
1639                        "lt", "lts", "lk", "lch", "lkh", "lg", "ls", "lz", "lx",
1640                        "bs", "ds", "ts", "lts", "ks", "khs", "gs", "fs", "rs", "rx",
1641                        "bs", "ds", "ts", "lts", "ks", "khs", "gs", "fs", "rs", "rx",
1642                        "rbs", "rds", "rts", "rks", "rkhs", "rgs", "rfs", "rs", "rx",
1643                        "lbs", "lds", "lts", "lks", "lkhs", "lgs", "lfs",
1644                        "rdz", "rvz", "gz", "rgz", "vd", "kt",
1645                        "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk",
1646                        "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh",
1647                        "x", "x", "x", "gh", "gh", "gh", "rgh",
1648                        "ts", "ts", "ks", "ks", "khs",
1649                        "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk",
1650                        "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh",
1651                        "x", "x", "x", "gh", "gh", "gh", "rgh",
1652                        "ts", "ts", "ks", "ks", "khs",
1653                        "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk",
1654                        "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh",
1655                        "x", "x", "x", "gh", "gh", "gh", "rgh",
1656                        "ts", "ts", "ks", "ks", "khs",
1657                },
1658                new String[]{},
1659                new String[]{"'"}, new int[]{1, 2, 3}, new double[]{6, 7, 3}, 0.05, 0.08, 0.11, 0.0, null, true);
1660    }
1661    /**
1662     * Fantasy language that might be suitable for a language spoken by demons, aggressive warriors, or people who seek
1663     * to emulate or worship similar groups. The tendency here is for DEMONIC to be the language used by creatures that
1664     * are considered evil because of their violence, while INFERNAL would be the language used by creatures that are
1665     * considered evil because of their manipulation and deceit (DEMONIC being "chaotic evil" and INFERNAL being "lawful
1666     * evil"). This uses lots of sounds that don't show up in natural languages very often, mixing harsh or guttural
1667     * sounds like "kh" and "ghr" with rare sounds like "vr", "zv", and "tl". It uses vowel-splitting in a way that is
1668     * similar to LOVECRAFT, sometimes producing sounds like "tsa'urz" or "khu'olk".
1669     * <br>
1670     * Vrirvoks xatughat ogz; olds xu'oz xorgogh!
1671     */
1672    public static final FakeLanguageGen DEMONIC = demonic().register("Demonic");
1673
1674    private static FakeLanguageGen infernal(){
1675        return new FakeLanguageGen(
1676                new String[]{
1677                        "a", "a", "a", "à", "á", "â", "ä",
1678                        "e", "e", "e", "e", "e", "e", "e", "e", "è", "é", "ê", "ë",
1679                        "i", "i", "i", "i", "ì", "í", "î", "ï",
1680                        "o", "o", "ò", "ó", "ô", "ö",
1681                        "u", "u", "ù", "ú", "û", "ü",
1682                },
1683                new String[]{"æ", "ai", "aî", "i", "i", "î", "ï", "ia", "iâ", "ie", "iê", "eu", "eû", "u", "u", "û", "ü"},
1684                new String[]{"b", "br", "d", "dr", "h", "m", "z", "k", "l", "ph", "t", "n", "y", "th", "s", "sh",
1685                        "m", "m", "m", "z", "z", "l", "l", "l", "k", "k", "b", "d", "h", "h", "y", "th", "th", "s", "sh",
1686                },
1687                new String[]{
1688                        "mm", "mm", "mm", "lb", "dd", "dd", "dd", "ddr", "bb", "bb", "bb", "bbr", "lz", "sm", "zr",
1689                        "thsh", "lkh", "shm", "mh", "mh",
1690                },
1691                new String[]{
1692                        "b", "d", "h", "m", "n", "z", "k", "l", "ph", "t", "th", "s", "sh", "kh",
1693                        "h", "m", "n", "z", "l", "ph", "t", "th", "s",
1694                        "h", "h", "h", "m", "m", "n", "n", "n", "n", "n", "l", "l", "l", "l", "l", "t", "t", "t",
1695                        "th", "th", "s", "s", "z", "z", "z", "z",
1696                },
1697                new String[]{"ael", "im", "on", "oth", "et", "eus", "iel", "an", "is", "ub", "ez", "ath", "esh", "ekh", "uth", "ut"},
1698                new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{3, 5, 9, 4}, 0.75, 0.35, 0.17, 0.07, genericSanityChecks, true);
1699    }
1700    /**
1701     * Fantasy language that might be suitable for a language spoken by fiends, users of witchcraft, or people who seek
1702     * to emulate or worship similar groups. The tendency here is for DEMONIC to be the language used by creatures that
1703     * are considered evil because of their violence, while INFERNAL is the language used by creatures that are
1704     * considered evil because of their manipulation and deceit (DEMONIC being "chaotic evil" and INFERNAL being "lawful
1705     * evil"). The name INFERNAL refers to Dante's Inferno and the various naming conventions used for residents of Hell
1706     * in the more-modern Christian traditions (as well as some of the stylistic conventions of Old Testament figures
1707     * described as false idols, such as Moloch and Mammon). In an effort to make this distinct from the general style
1708     * of names used in ancient Hebrew (since this is specifically meant for the names of villains as opposed to normal
1709     * humans), we add in vowel splits as used in LOVECRAFT and DEMONIC, then add quite a few accented vowels. These
1710     * traits make the language especially well-suited for "deal with the Devil" written bargains, where a single accent
1711     * placed incorrectly could change the meaning of a contract and provide a way for a fiend to gain leverage.
1712     * <br>
1713     * Zézîzûth eke'iez áhìphon; úhiah îbbëphéh haîtemheû esmez...
1714     */
1715    public static final FakeLanguageGen INFERNAL = infernal().register("Infernal");
1716
1717    private static FakeLanguageGen simplish(){
1718        return new FakeLanguageGen(
1719                new String[]{
1720                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
1721                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
1722                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
1723                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
1724                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
1725                        "ai", "ai", "ea", "io", "oi", "ia", "io", "eo"
1726                },
1727                new String[]{"u", "u", "oa"},
1728                new String[]{
1729                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n",
1730                        "p", "pl", "pr", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "v", "w", "y", "z",
1731                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n",
1732                        "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y",
1733                        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n",
1734                        "p", "r", "s", "sh", "t", "th",
1735                        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n",
1736                        "p", "r", "s", "sh", "t", "th",
1737                        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n",
1738                        "p", "r", "s", "sh", "t", "th",
1739                        "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n",
1740                        "p", "r", "s", "sh", "t", "th",
1741                        "b", "d", "f", "g", "h", "l", "m", "n",
1742                        "p", "r", "s", "sh", "t", "th",
1743                        "b", "d", "f", "g", "h", "l", "m", "n",
1744                        "p", "r", "s", "sh", "t", "th",
1745                        "r", "s", "t", "l", "n",
1746                },
1747                new String[]{"ch", "j", "w", "y", "v", "w", "y", "w", "y", "ch",
1748                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1749                },
1750                new String[]{"bs", "lt", "mb", "ng", "ng", "nt", "ns", "ps", "mp", "rt", "rg", "sk", "rs", "ts", "lk", "ct",
1751                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", "th", "z",
1752                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1753                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1754                        "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1755                        "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1756                        "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1757                        "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t",
1758                },
1759                new String[]{},
1760                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{7, 18, 6, 1}, 0.26, 0.12, 0.0, 0.0, genericSanityChecks, true);
1761    }
1762    /**
1763     * English-like language that omits complex spelling and doesn't include any of the uncommon word endings of English
1764     * like "ought" or "ation." A good choice when you want something that doesn't use any non-US-keyboard letters,
1765     * looks somewhat similar to English, and tries to be pronounceable without too much effort. This doesn't have any
1766     * doubled or silent letters, nor does it require special rules for pronouncing vowels like "road" vs. "rod", though
1767     * someone could make up any rules they want.
1768     * <br>
1769     * Fledan pranam, simig bag chaimer, drefar, woshash is sasik.
1770     */
1771    public static final FakeLanguageGen SIMPLISH = simplish().register("Simplish");
1772
1773
1774    private static FakeLanguageGen alien_a(){
1775        return new FakeLanguageGen(
1776                new String[]{"a", "a", "a", "a", "a", "a", "a", "ai", "ai", "ao", "ao", "ae", "ae", "e", "e", "e", "e",
1777                        "ea", "eo", "i", "i", "i", "i", "i", "i", "ia", "ie", "io", "o", "o", "o", "oa"},
1778                new String[]{},
1779                new String[]{"c", "f", "h", "j", "l", "m", "n", "p", "q", "r", "s", "v", "w", "x", "y", "z",
1780                        "c", "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z",
1781                        "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z",
1782                        "hc", "hf", "hj", "hl", "hm", "hn", "hq", "hr", "hv", "hw", "hy", "hz",
1783                        "cr", "fr", "jr", "mr", "nr", "pr", "qr", "sr", "vr", "xr", "yr", "zr",
1784                        "cy", "fy", "jy", "my", "ny", "py", "qy", "ry", "sy", "vy", "xy", "zy",
1785                        "cl", "fl", "jl", "ml", "nl", "pl", "ql", "sl", "vl", "xl", "yl", "zl",
1786                },
1787                new String[]{
1788                        "cr", "fr", "jr", "mr", "nr", "pr", "qr", "sr", "vr", "xr", "yr", "zr",
1789                        "cy", "fy", "jy", "my", "ny", "py", "qy", "ry", "sy", "vy", "xy", "zy",
1790                        "cl", "fl", "jl", "ml", "nl", "pl", "ql", "sl", "vl", "xl", "yl", "zl",
1791                                    "jc", "lc", "mc", "nc", "qc", "rc", "sc",       "wc", "yc", "zc",
1792                        "cf",       "jf", "lf",       "nf", "qf", "rf", "sf", "vf", "wf", "yf", "zf",
1793                        "cj", "fj",       "lj", "mj", "nj", "qj", "rj", "sj",       "wj", "yj", "zj",
1794                        "cm", "fm", "jm", "lm",       "nm", "qm", "rm", "sm", "vm", "wm", "ym", "zm",
1795                        "cn", "fn", "jn", "ln", "mn",       "qn", "rn", "sn", "vn", "wn", "yn", "zn",
1796                        "cp", "fp", "jp", "lp", "mp", "np", "qp", "rp", "sp", "vp", "wp", "yp", "zp",
1797                        "cq",       "jq", "lq", "mq", "nq",       "rq", "sq",       "wq", "yq", "zq",
1798                        "cs", "fs", "js", "ls", "ms", "ns", "qs",             "vs", "ws", "ys", "zs",
1799                        "cv", "fv", "jv", "lv", "mv", "nv", "qv", "rv", "sv",       "wv", "yv", "zv",
1800                        "cw", "fw", "jw", "lw", "mw", "nw", "qw", "rw", "sw", "vw",       "yw", "zw",
1801                        "cx",       "jx", "lx", "mx", "nx", "qx", "rx",       "vx", "wx", "yx", "zx",
1802                        "cz", "fz",       "lz", "mz", "nz", "qz", "rz", "sz", "vz", "wz", "yz",
1803                },
1804                new String[]{
1805                        "c", "f", "h", "j", "l", "m", "n", "p", "q", "r", "s", "v", "w", "x", "y", "z",
1806                        "c", "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z",
1807                        "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z",
1808                        "hc", "hf", "hj", "hl", "hm", "hn", "hq", "hr", "hv", "hw", "hy", "hz",
1809                },
1810                new String[]{},
1811                new String[]{}, new int[]{1, 2, 3}, new double[]{1, 1, 1}, 0.65, 0.6, 0.0, 0.0, null, true);
1812    }
1813
1814    /**
1815     * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for
1816     * an alien species. This alien language emphasizes unusual consonant groups and prefers the vowels 'a' and 'i',
1817     * sometimes with two different vowels in one syllable, like with 'ea', but never two of the same vowel, like 'ee'.
1818     * Many consonant groups may border on unpronounceable unless a different sound is meant by some letters, such as
1819     * 'c', 'h', 'q', 'x', 'w', and 'y'. In particular, 'x' and 'q' may need to sound like different breathy, guttural,
1820     * or click noises for this to be pronounced by humans effectively.
1821     * <br>
1822     * Jlerno iypeyae; miojqaexli qraisojlea epefsaihj xlae...
1823     */
1824    public static final FakeLanguageGen ALIEN_A = alien_a().register("Alien A");
1825
1826    private static FakeLanguageGen korean()
1827    {
1828        return new FakeLanguageGen(
1829                new String[]{
1830                        "a", "ae", "ya", "yae", "eo", "e", "yeo", "ye", "o", "wa", "wae",
1831                        "oe", "yo", "u", "wo", "we", "wi", "yu", "eu", "i",  "ui",
1832                        "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "u", "u", "u", "u",
1833                        "ae", "ya", "eo", "eo", "eu", "eu", "wa", "wae", "wo", "oe", "oe",
1834                        "yo", "yo", "yu", "yu", "eu",
1835                },
1836                new String[]{},
1837                new String[]{
1838                        "g", "n", "d", "r", "m", "b", "s", "j", "ch", "k", "t", "p", "h",
1839                        "g", "n", "d", "b", "p", "k", "j", "ch", "h",
1840                        "g", "n", "d", "b", "p", "k", "j", "h",
1841                        "g", "n", "p", "k", "j",
1842                        "g", "p", "k",
1843                        "g", "p", "k",
1844                },
1845                new String[]{
1846                        "g", "kg", "ngn", "kd", "ngn", "ngm", "kb", "ks", "kj", "kch", "k-k", "kt", "kp", "k",
1847                        "n", "n-g", "nn", "nd", "nn", "nm", "nb", "ns", "nj", "nch", "nk", "nt", "np", "nh",
1848                        "d", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "t",
1849                        "r", "lg", "nn", "ld", "ll", "lm", "lb", "ls", "lj", "lch", "lk", "lt", "lp", "lh",
1850                        "m", "mg", "mn", "md", "mn", "mm", "mb", "ms", "mj", "mch", "mk", "mt", "mp", "mh",
1851                        "b", "pg", "mn", "pd", "mn", "mm", "pb", "ps", "pj", "pch", "pk", "pt", "p-p", "p",
1852                        "s", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "t",
1853                        "ng-", "ngg", "ngn", "ngd", "ngn", "ngm", "ngb", "ngs", "ngj", "ngch", "ngk", "ngt", "ngp", "ngh",
1854                        "j", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "ch",
1855                        "t", "t", "t", "j", "j", "j", "g", "g", "g", "g", "n", "n", "n", "n", "n", "ng", "ng", "ng",
1856                        "d", "d", "d", "b", "b",
1857                        "tt", "nn", "kk", "kk", "ks",
1858                        "h", "k", "nn", "t", "nn", "nm", "p", "hs", "ch", "tch", "tk", "tt", "tp", "t",
1859                        "kk", "pp", "ss", "tt", "jj", "ks", "nch", "nh", "r",
1860                        "r", "r", "r", "r", "r", "r", "r", "r", "r", "r", "r", "r",
1861                        "ngg", "ngn", "ngm", "ngj", "ngch", "ngk", "ngp",
1862                        "mg", "mch", "mk", "md", "mb", "mp",
1863                        "nj", "nch", "nd", "nk", "nb", "nj", "nch", "nd", "nk",
1864                        "kg", "kj", "kch"
1865                },
1866                new String[]{
1867                        "k", "n", "t", "l", "m", "p", "k", "ng", "h", "n", "n",
1868                        "k", "n", "t", "l", "m", "p", "k", "ng", "h", "t",
1869                },
1870                new String[]{"ul", "eul", "eol", "ol",  "il", "yeol", "yol", "uk", "euk", "eok", "aek", "ok", "ak",
1871                        "on", "ong", "eong", "yang", "yong", "yeong", "ung", "wong", "om", "am", "im", "yuh", "uh", "euh",
1872                        "ap", "yaep", "eop", "wep", "yeop"
1873                },
1874                new String[]{"-"},
1875                new int[]{1, 2, 3, 4}, new double[]{14, 9, 3, 1}, 0.14, 0.24, 0.02, 0.09,
1876                null, true);
1877    }
1878    /**
1879     * Imitation text from an approximation of Korean, using the Revised Romanization method that is official in South
1880     * Korea today and is easier to type. The text this makes may be hard to pronounce. Korean is interesting as a
1881     * language to imitate for a number of reasons; many of the sounds in it are rarely found elsewhere, it can cluster
1882     * consonants rather tightly (most languages don't; English does to a similar degree but Japanese hardly has any
1883     * groups of consonants), and there are many more vowel sounds without using tones (here, two or three letters are
1884     * used for a vowel, where the first can be y or w and the rest can be a, e, i, o, or u in some combination). Some
1885     * letter combinations possible here are impossible or very rare in correctly-Romanized actual Korean, such as the
1886     * rare occurrence of a single 'l' before a vowel (it normally only appears in Romanized text before a consonant or
1887     * at the end of a word).
1888     * <br>
1889     * Hyeop euryam, sonyon muk tyeok aengyankeon, koelgwaelmwak.
1890     */
1891    public static final FakeLanguageGen KOREAN_ROMANIZED = korean().register("Korean Romanized");
1892
1893    private static FakeLanguageGen alien_e(){
1894        return new FakeLanguageGen(
1895                new String[]{"a", "a", "a", "a", "a", "a", "aa", "aa",
1896                        "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "ee", "ee", "ee", "ee",
1897                        "i", "i", "i", "i", "i", "ii",
1898                        "o", "o", "o", "o",
1899                        "u", "u", "u"
1900                },
1901                new String[]{},
1902                new String[]{"t", "k", "c", "g", "z", "s", "d", "r", "ts",
1903                        "tr", "kr", "cr", "gr", "zr", "st", "sk", "dr",
1904                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1905                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1906                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1907                        "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts",
1908                        "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts",
1909                        "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts",
1910                        "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", "t", "k", "ts",
1911                        "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", "t", "k", "ts",
1912                        "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", "t", "k",
1913                        "tr", "kr", "st", "sk", "tq", "kq", "sq"
1914                },
1915                new String[]{
1916                        "tt", "kk", "cc", "gg", "zz", "dd", "s", "r", "ts",
1917                        "tr", "kr", "cr", "gr", "zr", "st", "sk", "dr",
1918                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1919                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1920                        "tq", "kq", "cq", "gq", "zq", "sq", "dq",
1921                        "tk", "kt", "tc", "ct", "gt", "tg", "zt", "tz", "td", "dt", "rt", "rtr", "tst",
1922                        "kc", "ck", "gk", "kg", "zk", "kz", "kd", "dk", "rk", "rkr", "tsk", "kts",
1923                        "gc", "cg", "zc", "cz", "cd", "dc", "rc", "rcr", "tsc", "cts",
1924                        "zg", "gz", "gd", "dg", "rg", "rgr", "tsg", "gts",
1925                        "zd", "dz", "rz", "rzr", "tsz", "zts",
1926                        "rd", "rdr", "tsd", "dts",
1927                        "tt", "tt", "tt", "tt", "tt", "tt",
1928                        "tt", "tt", "tt", "tt", "tt", "tt",
1929                        "kk", "kk", "kk", "kk", "kk", "kk",
1930                        "kk", "kk", "kk", "kk", "kk", "kk",
1931                        "kt", "tk", "kt", "tk", "kt", "tk", "kt", "tk",
1932                },
1933                new String[]{
1934                        "t", "k", "c", "g", "z", "s", "d", "r", "ts",
1935                        "t", "k", "t", "k", "t", "k", "ts",
1936                        "t", "k", "c", "g", "z", "s", "d", "r", "ts",
1937                        "t", "k", "t", "k", "t", "k", "ts",
1938                        "st", "sk", "sc", "sg", "sz", "ds",
1939                        "rt", "rk", "rc", "rg", "rz", "rd", "rts"
1940                },
1941                new String[]{},
1942                new String[]{}, new int[]{1, 2, 3}, new double[]{5, 4, 2}, 0.45, 0.0, 0.0, 0.0, null, true);
1943    }
1944
1945    /**
1946     * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for
1947     * an alien species. This alien language emphasizes hard sounds and prefers the vowels 'e' and 'a', sometimes with
1948     * two of the same vowel, like 'ee', but never with two different vowels in one syllable, like with 'ea'.
1949     * This language is meant to use click sounds, if pronunciation is given, where 'q' modifies a consonant to form a
1950     * click, such as 'tq'. This is like how 'h' modifies letters in English to make 'th' different from 't' or 'h'.
1951     * This may be ideal for a species with a beak (or one that lacks lips for some other reason), since it avoids using
1952     * sounds that require lips (some clicks might be approximated by other species using their lips if this uses some
1953     * alien-specific clicking organ).
1954     * <br>
1955     * Reds zasg izqekkek zagtsarg ukaard ac ots as!
1956     */
1957    public static final FakeLanguageGen ALIEN_E = alien_e().register("Alien E");
1958
1959    private static FakeLanguageGen alien_i(){
1960        return new FakeLanguageGen(
1961                new String[]{
1962                        "a", "a", "a", "a", "a", "a", "à", "á", "â", "ā", "ä",
1963                        "e", "e", "e", "e", "e", "e", "è", "é", "ê", "ē", "ë",
1964                        "i", "i", "i", "i", "i", "i", "i", "i", "ì", "í", "î", "ï", "ī",
1965                        "i", "i", "i", "i", "i", "i", "i", "i", "ì", "í", "î", "ï", "ī",
1966                        "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "ò", "ó", "ô", "ō", "ö",
1967                        "u", "u", "u", "u", "u", "u", "ù", "ú", "û", "ū", "ü",
1968                },
1969                new String[]{},
1970                new String[]{
1971                        "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l",
1972                        "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l",
1973                        "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l",
1974                        "r", "r", "r", "r", "r", "l", "l", "l", "l", "l",
1975                        "gr", "gl", "zr", "zl", "sl", "shr", "thr", "mr", "nr", "pr", "pl", "br", "bl", "vr", "vl", "hr",
1976                        "zv", "sp", "zg"
1977                },
1978                new String[]{
1979                        "j", "h",
1980                },
1981                new String[]{
1982                        "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "v", "r", "l",
1983                        "th", "zh", "sh", "th", "zh", "sh", "lth", "lzh", "lsh", "rth", "rzh", "rsh",
1984                },
1985                new String[]{},
1986                new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{6, 9, 5, 1}, 0.6, 0.4, 0.075, 0.0, null, true);
1987    }
1988
1989    /**
1990     * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for
1991     * an alien species. This alien language emphasizes "liquid" sounds such as 'l', 'r', and mixes with those and other
1992     * consonants, and prefers the vowels 'i' and 'o', never with two of the same vowel, like 'ee', nor with two
1993     * different vowels in one syllable, like with 'ea'; it uses accent marks heavily and could be a tonal language.
1994     * It sometimes splits vowels with a single apostrophe, and rarely has large consonant clusters.
1995     * <br>
1996     * Asherzhäl zlómór ìsiv ázá nralthóshos, zlôbùsh.
1997     */
1998    public static final FakeLanguageGen ALIEN_I = alien_i().register("Alien I");
1999
2000    private static FakeLanguageGen alien_o(){
2001        return new FakeLanguageGen(
2002                new String[]{
2003                        "a", "e", "i", "o", "o", "o", "o", "u",
2004                        "aa", "ea", "ia", "oa", "oa", "oa", "ua", "ae", "ai", "ao", "ao", "ao", "au",
2005                        "ee", "ie", "oe", "oe", "oe", "ue", "ei", "eo", "eo", "eo", "eu",
2006                        "ii", "oi", "oi", "oi", "ui", "io", "io", "io", "iu",
2007                        "oo", "ou", "uo", "oo", "ou", "uo", "oo", "ou", "uo", "uu",
2008                        "aa", "ea", "ia", "oa", "oa", "oa", "ua", "ae", "ai", "ao", "ao", "ao", "au",
2009                        "ee", "ie", "oe", "oe", "oe", "ue", "ei", "eo", "eo", "eo", "eu",
2010                        "ii", "oi", "ui", "io", "io", "io", "iu",
2011                        "oo", "ou", "uo", "oo", "ou", "uo", "oo", "ou", "uo", "uu",
2012                        "aea", "aia", "aoa", "aoa", "aoa", "aua", "eae", "eie", "eoe", "eoe", "eoe", "eue",
2013                        "iai", "iei", "ioi", "ioi", "ioi", "iui", "uau", "ueu", "uiu", "uou",
2014                        "oao", "oeo", "oio", "ouo", "oao", "oeo", "oio", "ouo", "oao", "oeo", "oio", "ouo",
2015                        "aei", "aeo", "aeo", "aeo", "aeu", "aie", "aio", "aio", "aio", "aiu",
2016                        "aoe", "aoi", "aou", "aoe", "aoi", "aou", "aoe", "aoi", "aou", "aue", "aui", "auo", "auo", "auo",
2017                        "eai", "eao", "eao", "eao", "eau", "eia", "eio", "eio", "eio", "eiu",
2018                        "eoa", "eoi", "eou", "eoa", "eoi", "eou", "eoa", "eoi", "eou", "eua", "eui", "euo", "euo", "euo",
2019                        "iae", "iao", "iao", "iao", "iau", "iea", "ieo", "ieo", "ieo", "ieu",
2020                        "ioa", "ioe", "iou", "ioa", "ioe", "iou", "ioa", "ioe", "iou", "iua", "iue", "iuo", "iuo", "iuo",
2021                        "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui",
2022                        "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui",
2023                        "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui",
2024                        "uae", "uai", "uao", "uao", "uao", "uea", "uei", "ueo", "ueo", "ueo", "uia", "uie",
2025                        "uio", "uoa", "uoe", "uoi", "uio", "uoa", "uoe", "uoi", "uio", "uoa", "uoe", "uoi",
2026                },
2027                new String[]{},
2028                new String[]{
2029                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2030                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2031                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2032                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2033                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2034                        "hm", "hn", "hr", "hw", "hv", "hl", "hy",
2035                        "fm", "fn", "fr", "fw", "fv", "fl", "fy",
2036                        "mr", "vr", "ry"
2037                },
2038                new String[]{
2039                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2040                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2041                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2042                        "m", "n", "r", "w", "h", "v", "f", "l", "y",
2043                        "mm", "nn", "rr", "ww", "hh", "vv", "ff", "ll", "yy",
2044                        "mm", "nn", "rr", "ww", "hh", "vv", "ff", "ll", "yy",
2045                        "hm", "hn", "hr", "hw", "hv", "hl", "hy",
2046                        "fm", "fn", "fr", "fw", "fv", "fl", "fy",
2047                        "mr", "vr", "ry"
2048                },
2049                new String[]{
2050                        "m", "n", "r", "h", "v", "f", "l",
2051                        "m", "n", "r", "h", "v", "f", "l",
2052                        "m", "n", "r", "h", "v", "f", "l",
2053                        "rm", "rn", "rv", "rf", "rl",
2054                        "lm", "ln", "lv", "lf"
2055                },
2056                new String[]{},
2057                new String[]{}, new int[]{1, 2, 3}, new double[]{3, 6, 4}, 0.0, 0.55, 0.0, 0.0, null, true);
2058    }
2059
2060    /**
2061     * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for
2062     * an alien species. This alien language emphasizes large clusters of vowels, typically with 2 or 3 vowel sounds
2063     * between consonants, though some vowel groups could be interpreted in multiple ways (such as English "maim" and
2064     * "bail", which also have regional differences in pronunciation). As the name would suggest, it strongly prefers
2065     * using the vowel "o", with it present in about half the groups, but doesn't have any preference toward or against
2066     * the other vowels it uses, "a", "e", "i", and "u". The consonants completely avoid hard sounds like "t" and "k",
2067     * medium-hard sounds like "g" and "b", and also sibilants like "s" and "z".  This should be fairly hard to
2068     * pronounce, but possible.
2069     * <br>
2070     * Foiuhoeorfeaorm novruol naionouffeu meuif; hmoieloreo naemriou.
2071     */
2072    public static final FakeLanguageGen ALIEN_O = alien_o().register("Alien O");
2073
2074    // àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳ
2075    // çðþñýćĉċčďđĝğġģĥħĵķĺļľŀłńņňŋŕŗřśŝşšţťŵŷÿźżžșțẁẃẅ
2076    private static FakeLanguageGen alien_u(){
2077        return new FakeLanguageGen(
2078                new String[]{
2079                        "a", "a", "a", "a", "ä", "i", "o", "o", "o", "ö", "u", "u", "u", "u", "u", "u", "ü", "ü"
2080                },
2081                new String[]{},
2082                new String[]{
2083                        "b", "b", "b", "b", "d", "d", "g", "g", "ġ", "h", "h", "h", "h", "ħ",
2084                        "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p",
2085                        "q", "q", "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v",
2086                        "w", "w", "w", "w", "ẃ", "y", "y", "y", "y", "ý"
2087                },
2088                new String[]{
2089                        "b", "b", "b", "b", "d", "d", "g", "g", "ġ", "h", "h", "h", "h", "ħ",
2090                        "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p",
2091                        "q", "q", "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v",
2092                        "w", "w", "w", "w", "ẃ", "y", "y", "y", "y", "ý"
2093                },
2094                new String[]{
2095                        "b", "b", "b", "b", "d", "d", "g", "g", "ġ",
2096                        "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p",
2097                        "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v",
2098                },
2099                new String[]{"emb", "embrid", "embraŋ", "eŋ", "eŋul", "eŋov", "eẃul", "eẃuld", "eẃulb",
2100                        "eviś", "evim", "ełurn", "ełav", "egiġ", "ergiġ", "elgiġ", "eŕu", "eŕup", "eŕulm", "eŕuv",
2101                        "eħul", "eħid", "eħiŋ", "eyü", "eyür", "eyürl", "eyüld", "eyüns", "eqä", "eqäp", "eqäġ",
2102                        "esu", "esumb", "esulg", "esurl", "eśo", "eśold", "eśolg", "eśu", "eśur", "eśuŋ",
2103                        "eñu", "eñuns", "eñurn", "eño", "eñolb", "eñols"
2104                },
2105                new String[]{"'"}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 4, 7, 5, 2}, 0.4, 0.15, 0.06, 0.5, null, true);
2106    }
2107
2108    /**
2109     * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for
2110     * an alien species. This alien language is meant to have an abrupt change mid-word for many words, with the suffix
2111     * of roughly half of words using the letter "e", which is absent from the rest of the language; these suffixes can
2112     * also use consonant clusters, which are similarly absent elsewhere. The suffixes would make sense as a historical
2113     * relic or as a linguistic holdout from a historical merger. As the name would suggest, it strongly prefers
2114     * using the vowel "u", with it present in about half the groups, and can use the umlaut accent "ü" on some vowels.
2115     * The consonants completely avoid hard sounds like "t" and "k", and don't cluster; they often have special marks.
2116     * This should be relatively easy to pronounce for an alien language, though the words are rather long.
2117     * <br>
2118     * Üweħid vuŕeħid deẃul leŋul waloyeyür; äyovavü...
2119     */
2120    public static final FakeLanguageGen ALIEN_U = alien_u().register("Alien U");
2121
2122    private static FakeLanguageGen dragon(){
2123        return new FakeLanguageGen(
2124                new String[]{
2125                        "a", "a", "a", "e", "e", "i", "i", "o", "o", "u",
2126                        "a", "a", "a", "e", "e", "i", "i", "o", "o", "u",
2127                        "a", "a", "a", "e", "e", "i", "i", "o", "o", "u",
2128                        "a", "a", "a", "e", "e", "i", "i", "o", "o", "u",
2129                        "a", "a", "a", "a", "a", "a", "e", "i", "o",
2130                        "ai", "ai", "aa", "ae", "au", "ea", "ea", "ea",
2131                        "ia", "ia", "ie", "io", "io", "oa", "ou"
2132                },
2133                new String[]{
2134                        "aa", "aa", "aa", "ai", "ae", "ae", "ae", "au", "au",
2135                        "ea", "ea", "eo", "eo",
2136                        "ii", "ii", "ia", "ia", "ia", "ia", "ie", "ie", "ie", "io", "io", "io",
2137                        "oa", "ou", "ou", "ou", "ou"
2138                },
2139                new String[]{
2140                        "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z",
2141                        "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z",
2142                        "d", "f", "g", "h", "k", "l", "m", "n", "r", "t", "th", "v", "z",
2143                        "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z",
2144                        "d", "f", "g", "h", "l", "k", "l", "n", "r", "t", "th", "v", "z",
2145                        "d", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z",
2146                        "d", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z",
2147                        "d", "g", "k", "l", "r", "t",
2148                        "d", "g", "k", "l", "r", "t",
2149                        "d", "g", "k", "l", "r", "t",
2150                        "k", "k", "t", "t", "v",
2151                        "k", "k", "t", "t", "th",
2152                        "k", "k", "t", "t", "ch",
2153                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2154                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2155                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2156                        "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr",
2157                        "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr",
2158                },
2159                new String[]{
2160                        "rch", "rd", "rg", "rk", "rm", "rn", "rp", "rt", "rth", "rv", "rw", "rz",
2161                        "rch", "rd", "rg", "rk", "rm", "rn", "rp", "rt", "rth", "rv", "rw", "rz",
2162                        "rdr", "rgr", "rkr", "rtr", "rthr",
2163                        "lk", "lt", "lv", "lz",
2164                        "ng", "nk", "ng", "nk", "ng", "nk", "ng", "nk", "nt", "nth", "nt", "nth", "nt", "nth", "nd",
2165                        "ngr", "nkr", "ntr", "nthr",
2166                        "dh", "gh", "lh", "mh", "nh", "rh",
2167                        "dch", "dg", "dk", "dth", "dv", "dz",
2168                        "kch", "kg", "kd", "kth", "kv", "kz",
2169                        "gch", "gd", "gk", "gth", "gv", "gz",
2170                        "tch", "tg", "tk", "ty", "tv", "tz",
2171                        "zm", "zn", "zk", "zv", "zt", "zg", "zd",
2172
2173                        "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z",
2174                        "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z",
2175                        "d", "f", "g", "h", "k", "l", "m", "n", "r", "t", "th", "v", "z",
2176                        "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z",
2177                        "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v",
2178                        "d", "g", "k", "l", "n", "r", "t", "th", "v",
2179                        "d", "g", "k", "l", "n", "r", "t", "th", "v",
2180                        "d", "g", "k", "l", "r", "t",
2181                        "d", "g", "k", "l", "r", "t",
2182                        "d", "g", "k", "l", "r", "t",
2183                        "k", "k", "t", "t", "r",
2184                        "k", "k", "t", "t", "r",
2185                        "k", "k", "t", "t", "r",
2186                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2187                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2188                        "dr", "fr", "gr", "hr", "kr", "tr", "thr",
2189                        "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr",
2190                        "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr",
2191
2192                },
2193                new String[]{
2194                        "z", "z", "z", "t", "t", "t", "n", "r", "k", "th"
2195                },
2196                new String[]{"iamat", "at", "ut", "ok", "iok", "ioz", "ez", "ion", "ioth", "aaz", "iel"},
2197                new String[]{}, new int[]{2, 3, 4, 5}, new double[]{2, 7, 10, 3}, 0.14, 0.04, 0.0, 0.11, genericSanityChecks, true);
2198    }
2199
2200    /**
2201     * Fantasy language that tries to sound like the speech of a powerful and pompous dragon, using long, complex words
2202     * and a mix of hard consonants like "t" and "k", "liquid" consonants like "l" and "r", and sometimes vowel groups
2203     * like "ie" and "aa". It frequently uses consonant clusters involving "r". It uses no accented characters.
2204     * <br>
2205     * Vokegodzaaz kigrofreth ariatarkioth etrokagik deantoznik hragriemitaaz gianehaadaz...
2206     */
2207    public static final FakeLanguageGen DRAGON = dragon().register("Dragon");
2208
2209    /**
2210     * Fantasy language based closely on {@link #DRAGON}, but with much shorter words normally and closing syllables
2211     * that may sound "rushed" or "crude", though it has the same general frequency of most consonants and vowels.
2212     * This means it still uses lots of "t", "k", and "r", can group two vowels sometimes, and when there's a consonant
2213     * in the middle of a word, it is often accompanied by an "r" on one or both sides. If used with
2214     * {@link NaturalLanguageCipher}, this will look very similar to DRAGON, because the syllable lengths aren't
2215     * determined by this object but by the text being ciphered. Still, the ends of words are often different. It is
2216     * called KOBOLD because, even though the original kobold myth was that of a goblin-like spirit that haunted cobalt
2217     * mines, the modern RPG treatment of kobolds frequently describes them as worshippers of dragons or in some way
2218     * created by dragons, but generally they're a sort of failure to live up to a dragon's high expectations. The feel
2219     * of this language is meant to be something like a dragon's speech, but much less "fancy" and rather curt.
2220     * <br>
2221     * Thritriz, laazak gruz kokak thon lut...
2222     */
2223    public static final FakeLanguageGen KOBOLD = new FakeLanguageGen(
2224            DRAGON.openingVowels, DRAGON.midVowels, DRAGON.openingConsonants, DRAGON.midConsonants, DRAGON.closingConsonants,
2225            new String[]{"ik", "ak", "ek", "at", "it", "ik", "ak", "ek", "at", "it", "ik", "ak", "ek", "at", "it", "et", "ut", "ark", "irk", "erk"},
2226            DRAGON.vowelSplitters, new int[]{1, 2, 3}, new double[]{5, 11, 1},
2227            0.1, 0.0, 0.0, 0.22, genericSanityChecks, true).register("Kobold");
2228
2229    private static FakeLanguageGen insect(){
2230        return new FakeLanguageGen(
2231                new String[]{
2232                        "a", "a", "a", "a", "a", "a",
2233                        "e", "e", "e", "e",
2234                        "i", "i", "i", "i", "i", "i", "i",
2235                        "o", "o", "o",
2236                        "u", "u",
2237                },
2238                new String[]{},
2239                new String[]{"t", "k", "g", "sh", "s", "x", "r", "ts",
2240                        "tr", "kr", "gr", "shr", "st", "sk",
2241                        "tr", "kr", "st", "sk", "tr", "kr", "st", "sk",
2242                        "t", "k", "g", "sh", "s", "x", "r", "ts",
2243                        "t", "k", "r", "ts", "ts",
2244                        "t", "k", "r", "tr", "kr", "t", "k", "r", "tr", "kr", "t", "k", "r", "tr", "kr",
2245                        "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", "t", "k",
2246                },
2247                new String[]{
2248                        "rr","rr","rr","rr","rr","rr","rr","rr","rr","rr",
2249                        "rt", "rk", "rg", "rsh", "rs", "rx", "rts",
2250                        "xt", "xk", "xg", "xr",
2251                        "sts", "skr", "str", "sks"
2252                },
2253                new String[]{
2254                        "t", "k", "g", "sh", "s", "x", "r", "ts", "t", "k", "g", "sh", "s", "x", "r", "ts",
2255                        "rt", "rk", "rg", "rsh", "rs", "rx", "rts",
2256                        "t", "t", "t", "t", "t", "t", "k", "k", "k", "k", "k", "k", "x", "x", "rr", "rr", "rr"
2257                },
2258                new String[]{},
2259                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 4, 2, 1}, 0.3, 0.1, 0.0, 0.0, null, true);
2260    }
2261
2262    /**
2263     * Fantasy/sci-fi language that would typically be fitting for an insect-like species without a close equivalent to
2264     * human lips. This language emphasizes hard sounds such as 't' and 'k', uses some sibilants such as 's', 'sh', and
2265     * 'x', uses lots of 'r' sounds, includes trill sounds using 'rr' (as in Spanish), and uses primarily 'a' and 'i'
2266     * for vowels, with low complexity on vowels. Differs from {@link #ALIEN_E} by not having harder-to-explain click
2267     * sounds, and adjusting vowels/sibilants a fair bit.
2268     * <br>
2269     * Ritars tsarraxgits, krit trir istsak!
2270     */
2271    public static final FakeLanguageGen INSECT = insect().register("Insect");
2272
2273    private static FakeLanguageGen maori(){
2274        return new FakeLanguageGen(
2275                new String[]{"a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2276                        "o", "o", "o", "o", "o", "u", "u",
2277                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2278                        "o", "o", "o", "o", "o", "u", "u",
2279                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2280                        "o", "o", "o", "o", "o", "u", "u",
2281                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2282                        "o", "o", "o", "o", "o", "u", "u",
2283                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2284                        "o", "o", "o", "o", "o", "u", "u",
2285                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2286                        "o", "o", "o", "o", "o", "u", "u",
2287                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2288                        "o", "o", "o", "o", "o", "u", "u",
2289                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2290                        "o", "o", "o", "o", "o", "u", "u",
2291                        "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i",
2292                        "o", "o", "o", "o", "o", "u", "u",
2293                        "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au",
2294                        "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au",
2295                        "āe", "āi", "āi", "āi", "āo", "āo", "āo", "āo", "āu", "oi", "oe", "ou",
2296                        "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au",
2297                        "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au",
2298                        "āe", "āi", "āi", "āi", "āo", "āo", "āo", "āo", "āu", "oi", "oe", "ou",
2299                        "āa", "āoi", "āoe", "āou",
2300                        "āa", "āoi", "āoe", "āou",
2301                        "ea", "ei", "ei", "ei", "eo", "eo", "eo", "eu", "eae", "eai", "eao", "eā", "eāe", "eāi", "eāo", "eoi", "eoe", "eou",
2302                        "ia", "ia", "ie", "io", "io", "iu", "iae", "iai", "iao", "iau", "iā", "iāe", "iāi", "iāo", "iāu", "ioi", "ioe", "iou",
2303                        "oa", "oa", "oa", "oa", "oae", "oai", "oao", "oau", "oā", "oā", "oāe", "oāi", "oāo", "oāu",
2304                        "oa", "oa", "oa", "oa", "oae", "oai", "oao", "oau", "oā", "oā", "oāe", "oāi", "oāo", "oāu",
2305                        "ua", "ue", "ui", "uo", "uae", "uai", "uao", "uau", "uā", "uāe", "uāi", "uāo", "uāu", "uoi", "uoe", "uou",
2306                        "aea", "aea", "aei", "aei", "aei", "aeo", "aeo", "aeo", "aeu",
2307                        "aia", "aia", "aia", "aia", "aie", "aio", "aio", "aiu",
2308                        "aoa", "aoa",
2309                        "aua", "aua", "aue", "aue", "aue", "aui", "aui", "auo",
2310                        "āea", "āea", "āei", "āei", "āei", "āeo", "āeo", "āeo", "āeu",
2311                        "āia", "āia", "āia", "āia", "āie", "āio", "āio", "āiu",
2312                        "āoa", "āoa",
2313                        "āua", "āua", "āue", "āue", "āue", "āui", "āui", "āuo",
2314                },
2315                new String[]{},
2316                new String[]{"h", "h", "k", "k", "m", "m", "m", "m", "n", "n", "p", "p",
2317                        "r", "r", "r", "r", "r", "t", "t", "t", "t", "w", "w", "ng", "wh", "wh", "wh",
2318                        "h", "k", "m", "m", "m", "m", "n", "n", "p", "p",
2319                        "r", "r", "r", "r", "r", "t", "t", "t", "t", "w", "w", "wh", "wh", "wh"
2320                },
2321                new String[]{"h", "k", "k", "k", "m", "n", "n", "n", "p", "p", "p", "p", "p",
2322                        "r", "r", "r", "t", "t", "t", "w", "ng", "ng", "ng", "ng", "wh", "wh"
2323                },
2324                new String[]{""},
2325                new String[]{},
2326                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 5, 4, 2}, 0.2, 1.0, 0.0, 0.0, genericSanityChecks, true);
2327    }
2328
2329    /**
2330     * Imitation text from an approximation of the Maori language, spoken in New Zealand both today and historically,
2331     * and closely related to some other Polynesian languages. This version uses the current standard orthographic
2332     * standard of representing a long "a" with the letter "ā" (adding a macron diacritic).
2333     * <br>
2334     * Māuka whapi enāongupe worute, moa noepo?
2335     */
2336    public static final FakeLanguageGen MAORI = maori().register("Maori");
2337
2338    private static FakeLanguageGen spanish(){
2339        return new FakeLanguageGen(
2340                new String[]{"a", "a", "a", "a", "a", "i", "i", "i", "o", "o", "o", "e", "e", "e", "e", "e", "u", "u"},
2341                new String[]{"a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e",
2342                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2343                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2344                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2345                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2346                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2347                        "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e",
2348                        "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua",
2349                        "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua",
2350                        "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua",
2351                        "ái", "aí", "éo", "ía", "iá", "íe", "ié", "ío", "íu", "oí", "uí", "ué", "uá",
2352                        "á", "é", "í", "ó", "ú", "á", "é", "í", "ó",},
2353                new String[]{"b", "c", "ch", "d", "f", "g", "gu", "h", "j", "l", "m", "n", "p", "qu", "r", "s", "t", "v", "z",
2354                        "b", "s", "z", "r", "n", "h", "j", "j", "s", "c", "r",
2355                        "b", "s", "z", "r", "n", "h", "j", "s", "c", "r",
2356                        "b", "s", "r", "n", "h", "j", "s", "c", "r",
2357                        "n", "s", "l", "c", "n", "s", "l", "c",
2358                        "br", "gr", "fr"
2359                },
2360                new String[]{"ñ", "rr", "ll", "ñ", "rr", "ll", "mb", "nd", "ng", "nqu", "rqu", "zqu", "zc", "rd", "rb", "rt", "rt", "rc", "sm", "sd"},
2361                new String[]{"r", "n", "s", "s", "r", "n", "s", "s", "r", "n", "s", "s", "r", "n", "s", "s", 
2362                        "r", "n", "s", "r", "n", "s", "r", "n", "s", "r", "n", "s",
2363                },
2364                new String[]{"on", "ez", "es", "es", "es", "es", "es",
2365                        "ador", "edor", "ando", "endo", "indo",
2366                        "ar", "as", "amos", "an", "oy", "ay",
2367                        "er", "es", "emos", "en", "e",
2368                        "ir", "es", "imos", "en", "io",
2369                        "o", "a", "o", "a", "o", "a", "o", "a", "os", "as", "os", "as", "os", "as"
2370                },
2371                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{4, 5, 3, 1}, 0.1, 1.0, 0.0, 0.3, genericSanityChecks, true)
2372                .addModifiers(
2373                        new Modifier("([aeouáéóú])i$", "$1y"),
2374                        new Modifier("([qQ])ua", "$1ue"), // guapo, agua, guano, all real Spanish, we should allow gua
2375                        new Modifier("([qQ])uá", "$1ué"),
2376                        new Modifier("([qgQG])u[ouy]", "$1ui"),
2377                        new Modifier("([qgQG])u[óú]", "$1uí"));
2378    }
2379
2380    /**
2381     * Imitation text from an approximation of Spanish (not using the variations spoken in Spain, but closer to Latin
2382     * American forms of Spanish). This isn't as close as possible, but it abides by most of the orthographic rules that
2383     * Spanish uses. It uses the acute accent on the vowels á, é, í, ó, and ú, as well as the consonant ñ.
2384     * <br>
2385     * Jamos daí oñuezqui, luarbezquisdas canga ombiurta irri hoño resda!
2386     */
2387    public static final FakeLanguageGen SPANISH = spanish().register("Spanish");
2388    
2389    private static FakeLanguageGen deepSpeech(){
2390        return new FakeLanguageGen(
2391                new String[]{
2392                        "a", "a", "o", "o", "o", "o", "u", "u", "u", "u",
2393                        "a", "a", "o", "o", "o", "o", "u", "u", "u", "u",
2394                        "a", "a", "o", "o", "o", "o", "u", "u", "u", "u",
2395                        "a", "a", "o", "o", "o", "o", "u", "u", "u", "u",
2396                        "a", "a", "o", "o", "o", "o", "u", "u", "u", "u",
2397                        "aa", "aa", "oo", "oo", "oo", "oo", "uu", "uu", "uu", "uu",
2398                        "aa", "aa", "oo", "oo", "oo", "oo", "uu", "uu", "uu", "uu",
2399                        "ah", "ah", "oh", "oh", "oh", "oh", "uh", "uh", "uh", "uh",
2400                        "aah", "ooh", "ooh", "uuh", "uuh",
2401                },
2402                new String[]{},
2403                new String[]{
2404                        "m", "ng", "r", "x", "y", "z", "v", "l",
2405                        "m", "ng", "r", "x", "y", "z", "v", "l",
2406                        "m", "ng", "r", "x", "y", "z", "v", "l",
2407                        "m", "ng", "r", "x", "y", "z", "v", "l",
2408                        "m", "ng", "r", "x", "y", "z", "v", "l",
2409                        "m", "ng", "r", "z", "l",
2410                        "m", "ng", "r", "z", "l",
2411                        "m", "ng", "r", "z", "l",
2412                        "m", "ng", "r", "z", "l",
2413                        "mr", "vr", "ry", "zr",
2414                        "mw", "vw", "ly", "zw",
2415                        "zl", "vl"
2416                },
2417                new String[]{
2418                },
2419                new String[]{
2420                        "m", "ng", "r", "x", "z", "v", "l",
2421                        "m", "ng", "r", "x", "z", "v", "l",
2422                        "m", "ng", "r", "x", "z", "v", "l",
2423                        "m", "ng", "r", "x", "z", "v", "l",
2424                        "rm", "rng", "rx", "rz", "rv", "rl",
2425                        "lm", "lx", "lz", "lv",
2426                },
2427                new String[]{},
2428                new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{3, 6, 5, 1}, 0.18, 0.25, 0.07, 0.0, null, true);
2429    }
2430
2431    /**
2432     * Fantasy/sci-fi language that would potentially be fitting for a trade language spoken by various very-different
2433     * groups, such as creatures with tentacled faces who need to communicate with spider-elves and living crystals.
2434     * This language tries to use relatively few sounds so vocally-restricted species can speak it or approximate it,
2435     * but some of its sounds are uncommon. It uses "ng" as Vietnamese does, as a sound that can be approximated with
2436     * "w" but more accurately is like the sound at the end of "gong". It uses a breathy sound in many vowels,
2437     * represented by "h", and this is separate from (and can be combined with) lengthening the vowel by doubling it
2438     * ("a", "ah", "aa", and "aah" are different). The "x" sound can be approximated by any of the "kh" or "q" sounds
2439     * used in various human languages, or with its usage in English for "ks". This does separate some vowels with "'",
2440     * which can be a glottal stop as in Hawaiian or various other languages, or approximated with a brief pause.
2441     * <br>
2442     * Zrolmolurz, voluu, nguu yuh'ongohng!
2443     */
2444    public static final FakeLanguageGen DEEP_SPEECH = deepSpeech().register("Deep Speech");
2445    /**
2446     * Somewhat close to Old Norse, which is itself very close to Icelandic, but changed to avoid letters not on a
2447     * US-ASCII keyboard. Not to be confused with the language(s) of Norway, where the Norwegian languages are called
2448     * norsk, and are further distinguished into Bokmål and Nynorsk. This just applies {@link Modifier#SIMPLIFY_NORSE}
2449     * to {@link #NORSE}. This replaces eth ('Ðð') and thorn ('Þþ') with 'th' unless preceded by 's' (where 'sð' or 'sþ'
2450     * becomes "st") or followed by 'r' (where 'ðr' or 'þr' becomes 'fr'). It replaces 'Æ' or 'æ' with 'Ae' or 'ae', and
2451     * replaces 'Ö' or 'ö' with 'Ou' or "ou", which can change the length of a String relative to NORSE. It removes all
2452     * other accent marks (since the two-dot umlaut accent has already been changed, this only affects acute accents).
2453     * It also changes some of the usage of "j" where it means the English "y" sound, making "fjord" into "fyord", which
2454     * is closer to familiar uses from East Asia like "Tokyo" and "Pyongyang".
2455     * <br>
2456     * Leyrk tyour stomri kna sno aed frepdapa, prygso?
2457     */
2458    public static final FakeLanguageGen NORSE_SIMPLIFIED = norse().addModifiers(Modifier.SIMPLIFY_NORSE)
2459            .register("Norse Simplified");
2460    
2461    private static FakeLanguageGen hletkip(){
2462        return new FakeLanguageGen(
2463                new String[]{"a", "a", "a", "e", "e", "e", "e", "e", "i", "i", "i", "i",
2464                        "o", "o", "u", "u", "u", "u",},
2465                new String[]{},
2466                new String[]{
2467                              "hf", "hl", "hm", "hn",                      "hr", "hs", "hv", "hw",  "hy", "hz",
2468                          "br", "kr", "fr", "mr", "nr", "pr", "khr", "shr", "zhr", "sr",       "vr", "thr", "zv", "zr",
2469                          "by", "ky", "fy", "my", "ny", "py", "khy", "shy", "zhy", "ry", "sy", "vy", "thy", "zy",
2470                          "bl", "kl", "fl", "ml", "nl", "pl", "khl", "shl", "zhl",       "sl", "vl", "thl", "lw", "zl",
2471                          "bf", "kf",       "mf", "nf", "pf",        "fsh", "shf", "fr", "sf", "fl", "fr",  "fw", "fz",
2472                          "bs", "ks", "fs", "ms", "ns", "ps", "skh", "shs", "khs",            "shv","shw",
2473                          "pkh", "psh", "pth", "pw", "tkh", "tsh", "tth", "tw", "sht", "bkh", "bsh", "bth", "bw",
2474                          "dkh", "dth", "dw", "dzh", "khg", "shg", "thg", "gw", "zhg", "khk", "thk", "kw",
2475                },
2476                new String[]{
2477                        "hf", "hl", "hm", "hn",                    "hr", "hs", "hv", "hw",  "hy", "hz",
2478                        "br", "kr", "fr", "mr", "nr", "pr", "khr", "shr", "zhr", "sr",       "vr", "thr", "zv", "zr",
2479                        "by", "ky", "fy", "my", "ny", "py", "khy", "shy", "zhy", "ry", "sy", "vy", "thy", "zy",
2480                        "bl", "kl", "fl", "ml", "nl", "pl", "khl", "shl", "zhl",       "sl", "vl", "thl", "lw", "zl",
2481                        "bf", "kf",       "mf", "nf", "pf",        "fsh", "shf", "fr", "sf", "fl", "fr",  "fw", "fz",
2482                        "bs", "ks", "fs", "ms", "ns", "ps", "skh", "shs", "khs",            "shv","shw",
2483                        "pkh", "psh", "pth", "pw", "tkh", "tsh", "tth", "tw", "bkh", "bsh", "bth", "bw",
2484                        "dkh", "dsh", "dth", "dw", "khg", "shg", "thg", "gw", "khk", "thk", "kw",
2485                        "rb", "rk", "rf", "rm", "rn", "rp", "rkh", "rsh", "rzh", "rh", "rv", "rw", "rz", "rl",
2486                        "lb", "lk", "lf", "lm", "ln", "lp", "lkh", "lsh", "lzh", "lh", "lv", "lw", "lz", "lr",
2487                        "sb", "sk", "sf", "sm", "sn", "sp", "skh", "gsh", "dzh", "sh", "sv", "sw", "sz", "ts", "st",
2488                        "mb", "md", "mk", "mf", "tm", "nm", "mp", "mkh", "msh", "mzh", "mh", "mv", "mw", "mt", "mz",
2489                        "nb", "nd", "nk", "nf", "tn", "mn", "np", "nkh", "nsh", "nzh", "nh", "nv", "nw", "nt", "nz",
2490                        "zb", "zd", "zk", "zf", "zt", "nz", "zp", "zkh", "zhz", "dz",  "hz", "zv", "zw", "tz",
2491                },
2492                new String[]{
2493                },
2494                new String[]{"ip", "ik", "id", "iz", "ir", "ikh", "ish", "is", "ith", "iv", "in", "im", "ib", "if",
2495                        "ep", "ek", "ed", "ez", "er", "ekh", "esh", "es", "eth", "ev", "en", "em", "eb", "ef",
2496                        "up", "ud", "uz", "ur", "ush", "us", "uth", "uv", "un", "um", "ub", "uf", 
2497                },
2498                new String[]{}, new int[]{1, 2, 3}, new double[]{1, 1, 1}, 0.0, 0.4, 0.0, 1.0, null, true);
2499    }
2500
2501    /**
2502     * A fictional language that could ostensibly be spoken by some group of humans, but that isn't closely based on any
2503     * one real-world language. It is meant to have a mix of hard and flowing sounds, roughly like Hebrew or Turkish,
2504     * but with a very different set of consonants and consonant blends. Importantly, consonant sounds are always paired
2505     * here except for the final consonant of a word, which is always one consonant sound if it is used at all. The
2506     * choices of consonant sounds are designed to be unusual, like "hl", "pkh", and "zhg" (which can all start a word).
2507     * <br>
2508     * Nyep khruv kwolbik psesh klulzhanbik psahzahwuth bluryup; hnish zhrim?
2509     */
2510    public static final FakeLanguageGen HLETKIP = hletkip().register("Hletkip");
2511    
2512    private static FakeLanguageGen ancientEgyptian(){
2513        return new FakeLanguageGen(
2514                new String[]{"a", "a", "a", "a", "a", "aa", "e", "e", "e", "e", "e", "e", "e", "i", "i", "i",
2515                        "u", "u", "u",},
2516                new String[]{},
2517                new String[]{
2518                        "b",
2519                        "p", "p", "p",
2520                        "f", "f", "f", "f", "f",
2521                        "m", "m", "m", "m", "m", "m",
2522                        "n", "n", "n", "n", "n",
2523                        "r", "r", "r", "r", "r", "r",
2524                        "h", "h", "h", "h", "h", "h", "h", "h",
2525                        "kh", "kh", "kh", "kh", "kh", "kh",
2526                        "z",
2527                        "s", "s", "s", "s", "s", "s", "s", "s",
2528                        "sh", "sh", "sh", "sh",
2529                        "k", "k", "k", "k", "k",
2530                        "g", "g", "g", 
2531                        "t", "t", "t", "t", "t", "t",
2532                        "th", "th", "th",
2533                        "d", "d", "d",
2534                        "dj",
2535                        "w", "w", "w",
2536                        "pt"
2537                },
2538                new String[]{
2539                        "b",
2540                        "p", "p", "p", "pw", "pkh", "ps", "ps", "pt",
2541                        "f", "f", "f", "f", "f", "ft",
2542                        "m", "m", "m", "m", "m", "m", "mk", "nm",
2543                        "n", "n", "n", "n", "n", "nkh", "nkh", "nk", "nt", "ns",
2544                        "r", "r", "r", "r", "r", "r", "rs", "rt",
2545                        "h", "h", "h", "h", "h", "h", "h", "h",
2546                        "kh", "kh", "kh", "kh", "kh", "kh", "khm", "khm", "khw",
2547                        "z",
2548                        "s", "s", "s", "s", "s", "s", "s", "s", "st", "sk", "skh",
2549                        "sh", "sh", "sh", "sh", "shw",
2550                        "k", "k", "k", "k", "k", "kw",
2551                        "g", "g", "g",
2552                        "t", "t", "t", "t", "t", "t", "ts",
2553                        "th", "th", "th",
2554                        "d", "d", "d", "ds",
2555                        "dj",
2556                        "w", "w", "w",
2557                },
2558                new String[]{
2559                        "m", "n", "t", "s", "p", "sh", "m", "n", "t", "s", "p", "sh", "m", "n", "t", "s", "p", "sh",
2560                        "kh", "f"
2561                },
2562                new String[]{"amon", "amun", "ut", "epsut", "is", "is", "ipsis", "akhti", "eftu", "atsut", "amses"
2563                },
2564                new String[]{"-"}, new int[]{1, 2, 3, 4}, new double[]{4, 7, 3, 2}, 0.5, 0.4, 0.06, 0.09, null, true);
2565    }
2566
2567    /**
2568     * A (necessarily) very rough anglicization of Old Egyptian, a language that has no precisely known pronunciation
2569     * rules and was written with hieroglyphics. This is meant to serve as an analogue for any ancient language with few
2570     * contemporary speakers.
2571     * <br>
2572     * Thenamses upekha efe emesh nabasu ahakhepsut!
2573     */
2574    // for future reference, consult https://en.wiktionary.org/wiki/Module:egy-pron-Egyptological
2575    public static final FakeLanguageGen ANCIENT_EGYPTIAN = ancientEgyptian().register("Ancient Egyptian");
2576
2577    private static FakeLanguageGen crow(){
2578        return new FakeLanguageGen(
2579                new String[]{"a", "a", "a", "a", "a","a", "a", "a","a", "a", "a", "á", "á", "aa", "aa", "áá", "áa",
2580                        "e", "e", "e", "e", "e", "e", "ee", "ée", "é", "éé",
2581                        "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "ii", "íí", "íi", "í",
2582                        "o", "o", "o", "o", "o", "o", "o", "oo", "óó", "óo", "ó",
2583                        "u", "u","u", "u","u", "u","u", "u", "u", "u", "uu", "úú", "úu", "ú",
2584                        "ia", "ua", "ia", "ua", "ia", "ua", "ia", "ua", "ía", "úa"
2585                },
2586                new String[]{
2587                },
2588                new String[]{
2589                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2590                        "k", "k", "m", "k", "k", "m", "d", "s"},
2591                new String[]{
2592                        "bb", "pp", "ss", "kk", "ll", "mm", "nn", "dd", "tt",
2593                        "kk", "kk", "mm", "kk", "kk", "mm", "dd", "ss",
2594                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2595                        "k", "k", "m", "k", "k", "m", "d", "s",
2596                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2597                        "k", "k", "m", "k", "k", "m", "d", "s",
2598                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2599                        "k", "k", "m", "k", "k", "m", "d", "s",
2600                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2601                        "k", "k", "m", "k", "k", "m", "d", "s",
2602                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2603                        "k", "k", "m", "k", "k", "m", "d", "s",
2604                        "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2605                        "k", "k", "m", "k", "k", "m", "d", "s"
2606                },
2607                new String[]{"b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh",
2608                        "k", "k", "m", "k", "k", "m", "d", "s"
2609                },
2610                new String[]{
2611                },
2612                new String[]{"-"}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 7, 6, 4, 2}, 0.4, 1.0, 0.12, 0.0, null, true);
2613    }
2614    /**
2615     * A rough imitation of the Crow language of the American Midwest, using some tone marks. Some of the orthography
2616     * rules aren't clear across Internet information about the language, so this really is a "fake" language it will be
2617     * generating, not the real thing at all. This considers 'x' to be the rough back-of-throat noise that isn't in
2618     * English other than in loanwords, like the Scottish "loch," and in names like the German "Bach." Doubled (to use
2619     * the linguistic term, geminated) consonants are pronounced for a longer time, and doubled vowels with the same
2620     * accent mark or no accent mark are also lengthened. An un-accented vowel has a normal tone, an accented vowel has
2621     * a high tone, and an accented vowel followed by an un-accented vowel has a falling tone. This last feature is the
2622     * least common among languages here, and is a good way of distinguishing imitation Crow from other languages.
2623     * <br>
2624     * Pashu-umíkiki; chinébúlu ak kóokutú shu-eníí-a ipíimúu heekokáakoku?
2625     */
2626    public static final FakeLanguageGen CROW = crow().register("Crow");
2627
2628    private static FakeLanguageGen imp(){
2629        return new FakeLanguageGen(
2630                new String[]{"a", "a", "a", "a", "a", "á", "á", "á", "aa", "aa", "aa", "aaa", "aaa", "aaa", "áá", "áá", "ááá", "ááá",
2631                        "e", "e", "e", "e", "e", "e",
2632                        "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "í", "í", "í", "í",
2633                        "ii", "ii", "ii", "iii", "iii", "iii", "íí", "íí", "ííí", "ííí",
2634                        "u", "u", "u", "u", "u", "u", "u", "u", "ú", "ú", "ú", "uu", "uu", "uu", "úú", "úú", "úúú", "úúú",
2635                        "ia", "ia", "ia", "ui", "ui"
2636                },
2637                new String[]{
2638                },
2639                new String[]{
2640                        "s", "k", "d", "t", "h", "f", "g", "r", "r", "r", "r", "gh", "ch",
2641                        "sk", "st", "skr", "str", "kr", "dr", "tr", "fr", "gr"
2642                },
2643                new String[]{
2644                        "s", "k", "d", "t", "h", "f", "g", "r", "r", "r", "r", "gh", "ch",
2645                        "sk", "st", "skr", "str", "kr", "dr", "tr", "fr", "gr"
2646                },
2647                new String[]{
2648                        "s", "k", "d", "t", "g", "gh", "ch"
2649                },
2650                new String[]{
2651                },
2652                new String[]{"-"}, new int[]{1, 2, 3}, new double[]{7, 11, 4}, 0.2, 0.5, 0.4, 0.0, null, true);
2653    }
2654    /**
2655     * A fantasy language meant for obnoxious screeching annoying enemies more-so than for intelligent friends or foes.
2656     * Uses accented vowels to mean "louder or higher-pitched" and up to three repeats of any vowel to lengthen it.
2657     * <br>
2658     * Siii-aghak fítríííg dú-úgh ru-úúk, grííírá!
2659     */
2660    public static final FakeLanguageGen IMP = imp().register("Imp");
2661    
2662    private static FakeLanguageGen malay(){
2663        return new FakeLanguageGen(
2664                new String[]{
2665                        "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "ai", "ai", "au",
2666                        "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e",
2667                        "i", "i", "i", "i", "i", "i", "i", "i", "ia", "ia",
2668                        "o", "o", "o", "o", "o", "o", "ou", 
2669                        "u", "u", "u", "u", "u", "u", "u", "u", "u", "ua", "ua",},
2670                new String[]{},
2671                new String[]{
2672                        "b", "b", "b", "b",
2673                        "ch",
2674                        "d", "d", "d", "d",
2675                        "f",
2676                        "g", "g",
2677                        "h", "h",
2678                        "j", "j", "j", "j",
2679                        "k", "k", "k", "k", "k", "k",
2680                        "kh",
2681                        "l", "l", "l", "l", "l", "l", "l",
2682                        "m", "m", "m", "m",
2683                        "n", "n", "n",
2684                        "p", "p", "p", "p", "p",
2685                        "r", "r",
2686                        "s", "s", "s", "s", "s",
2687                        "sh", "sh",
2688                        "t", "t", "t", "t",
2689                        "w",
2690                        "y",
2691                        "z",
2692                },
2693                new String[]{
2694                        "b", "b", "b", "b",
2695                        "ch",
2696                        "d", "d", "d", "d",
2697                        "f",
2698                        "g", "g",
2699                        "h", "h", "h", "h", "h",
2700                        "j", "j", "j",
2701                        "k", "k", "k", "k", "k", "k", "k", "k", "k",
2702                        "kn",
2703                        "kh",
2704                        "l", "l", "l", "l", "l", "l", "l",
2705                        "m", "m", "m", "m", "m", "m",
2706                        "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
2707                        "nt", "nt", "nj",
2708                        "ng", "ng", "ng", "ng",
2709                        "ngk","ngg",
2710                        "ny", "ny",
2711                        "p", "p", "p", "p", "p",
2712                        "r", "r", "r", "r", "r", "r", "r", "r",
2713                        "rb", "rd", "rg", "rk", "rs", "rt", "rn", "rn",
2714                        "s", "s", "s", "s", "s", "s",
2715                        "sh", "sh",
2716                        "t", "t", "t", "t", "t", "t",
2717                        "w",
2718                        "y",
2719                        "z",
2720                },
2721                new String[]{
2722                        "k", "k", "k", "k", "k", "k", "t", "t", "t", "n", "n", "n", "n", "n", "n", "n", "n",
2723                        "ng", "ng", "ng", "m", "m", "m", "s", "s", "l", "l", "l", "l", "l", "h", "h"
2724                },
2725                new String[]{"uk", "uk", "ok", "an", "at", "ul", "ang", "ih", "it", "is", "ung", "un", "ah"
2726                },
2727                new String[]{}, new int[]{1, 2, 3}, new double[]{5, 3, 2}, 0.2, 0.25, 0.0, 0.2, genericSanityChecks, true);
2728    }
2729
2730    /**
2731     * An approximation of the Malay language or any of its close relatives, such as Indonesian. This differs from Malay
2732     * as it is normally written by using "ch" for what Malay writes as "c" (it is pronounced like the start of "chow"),
2733     * and "sh" for what Malay writes as "sy" (pronounced like the start of "shoe").
2734     * <br>
2735     * Kashanyah satebok bisal bekain akinuk an as, penah lukul...
2736     */
2737    public static final FakeLanguageGen MALAY = malay().register("Malay");
2738    private static FakeLanguageGen celestial(){
2739        return new FakeLanguageGen(
2740                new String[]{
2741                        "a", "a", "a", "a", "a", "a", "a", "e", "e", "e", "i", "i", "i", "i", "i", "o", "o", "o",
2742                        "a", "a", "a", "a", "a", "a", "a", "e", "e", "e", "i", "i", "i", "i", "i", "o", "o", "o",
2743                        "ă", "ă", "ĕ", "ĭ", "ŏ"
2744                },
2745                new String[]{},
2746                new String[]{
2747                        "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j",
2748                        "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j",
2749                        "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j",
2750                        "n", "m", "v", "s", "z", "h", "y", "w", "j",
2751                        "n", "m", "v", "s", "z", "h", "y", "w", "j",
2752                        "n", "m", "s", "h", "y", "j",
2753                        "n", "m", "s", "h", "y", "j",
2754                        "n", "m", "s", "h", "y", "j",
2755                        "h", "h", "h", "h", "h", "h", "h", "h",
2756                        "m", "m", "m", "m", "m", "m",
2757                        "ry", "ly", "by", "dy", "ny", "my", "vy", "by", "dy", "sy", "zy",
2758                        "bl", "br", "dr", "shl", "shr", "hr"
2759                },
2760                new String[]{
2761                        "j", "j", "j",
2762                        "mh", "mb", "md", "mr", "ms", "mz", "mv",
2763                        "nh", "nb", "nd", "nr", "ns", "nz", "nv",
2764                        "zh", "zb", "zd", "zr", "zv",
2765                        "bd", "db", "bm", "bn", "dm", "dn",
2766                        "ry", "ly", "by", "dy", "ny", "my", "vy", "by", "dy", "sy", "zy", "wy", "jy",
2767                        "bl", "br", "dr", "shl", "shr", "hr"
2768                },
2769                new String[]{
2770                        "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z",
2771                        "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh",
2772                        "l", "r", "n", "m", "v", "b", "d", "th",
2773                        "l", "r", "n", "m", "b", "d", "th",
2774                        "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m",
2775                },
2776                new String[]{
2777                        "am", "an", "ar", "av", "em", "el", "ez", "eth", "ev", "es", "im", "id", "in", "oth", "om",
2778                        "ar", "el", "es", "im", "oth",
2779                        "ăyom", "ĕzra", "ĭdniv", "ŏlor", "evyăd", "iyĕr", "abĭl", "onrŏv"
2780                },
2781                new String[]{"'"}, new int[]{1, 2, 3}, new double[]{5, 6, 2}, 0.45, 0.1, 0.04, 0.14, genericSanityChecks, true);
2782    }
2783
2784    /**
2785     * Fantasy language that is meant to sound like it could be spoken by divine or (magical) otherworldly beings.
2786     * Sometimes uses the breve mark (as in {@code ăĕĭŏ}) over vowels and rarely splits consonants with {@code '}.
2787     * Uses very few harsh sounds, and may be easy to confuse with {@link #ELF} (this tends to use much shorter words).
2788     * This happens to sound a little like Hebrew, but since this doesn't have some consonants that are commonly used in
2789     * Hebrew, and because this uses accented vowels that aren't in Hebrew, they should be different enough that this
2790     * language can seem "not of this world."
2791     * <br>
2792     * Emŏl ebin hanzi'ab, isharar omrihrel nevyăd.
2793     */
2794    public static final FakeLanguageGen CELESTIAL = celestial().register("Celestial");
2795    
2796    private static FakeLanguageGen chinese(){
2797        return new FakeLanguageGen(
2798                new String[]{
2799                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", "yū", "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", "yú",
2800                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", "yǔ", "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", "yù",
2801                        "a", "e", "i", "o", "u", "a", "i", "o", "u", "yu", "a", "e", "i", "o", "u", "a", "i", "o", "u", "yu",
2802                },
2803                new String[]{
2804                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2805                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2806                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2807                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2808                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2809                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2810                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2811                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2812                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2813                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2814                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2815                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2816                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2817                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2818                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2819                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2820                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2821                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2822                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2823                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2824                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2825                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2826                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2827                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2828                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2829                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2830                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2831                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2832                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2833                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2834                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2835                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2836                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2837                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2838                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2839                        "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū",
2840                        "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú",
2841                        "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ",
2842                        "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù",
2843                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2844
2845                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2846                        "a", "e", "i", "o", "u", "a", "i", "o", "u",
2847
2848                        "āí", "āó", "āú", "ēá", "īá", "īú", "ōá", "ūá", "ūé",
2849                        "āǐ", "āǒ", "āǔ", "ēǎ", "īǎ", "īǔ", "ōǎ", "ūǎ", "ūě",
2850                        "āì", "āò", "āù", "ēà", "īà", "īù", "ōà", "ūà", "ūè",
2851                        "āi", "āo", "āu", "ēa", "īa", "īu", "ōa", "ūa", "ūe",
2852
2853                        "áī", "áō", "áū", "éā", "íā", "íū", "óā", "úā", "úē",
2854                        "áǐ", "áǒ", "áǔ", "éǎ", "íǎ", "íǔ", "óǎ", "ǔǎ", "ǔě",
2855                        "áì", "áò", "áù", "éà", "íà", "íù", "óà", "ùà", "ùè",
2856                        "ái", "áo", "áu", "éa", "ía", "íu", "óa", "ua", "ue",
2857
2858                        "ǎī", "ǎō", "ǎū", "ěā", "ǐā", "ǐū", "ǒā", "ǔā", "ǔē",
2859                        "ǎí", "ǎó", "ǎú", "ěá", "ǐá", "ǐú", "ǒá", "ǔá", "ǔé",
2860                        "ǎì", "ǎò", "ǎù", "ěà", "ǐà", "ǐù", "ǒà", "ǔà", "ǔè",
2861                        "ǎi", "ǎo", "ǎu", "ěa", "ǐa", "ǐu", "ǒa", "ǔa", "ǔe",
2862
2863                        "àī", "àō", "àū", "èā", "ìā", "ìū", "òā", "ùā", "ùē",
2864                        "àí", "àó", "àú", "èá", "ìá", "ìú", "òá", "ùá", "ùé",
2865                        "àǐ", "àǒ", "àǔ", "èǎ", "ìǎ", "ìǔ", "òǎ", "ùǎ", "ùě",
2866                        "ài", "ào", "àu", "èa", "ìa", "ìu", "òa", "ùa", "ùe",
2867
2868                        "aī", "aō", "aū", "eā", "iā", "iū", "oā", "uā", "uē",
2869                        "aí", "aó", "aú", "eá", "iá", "iú", "oá", "uá", "ué",
2870                        "aǐ", "aǒ", "aǔ", "eǎ", "iǎ", "iǔ", "oǎ", "uǎ", "uě",
2871                        "aì", "aò", "aù", "eà", "ià", "iù", "oà", "uà", "uè",
2872
2873                        "yū", "yú", "yū", "yú", "yū", "yú",
2874                        "yǔ", "yù", "yǔ", "yù", "yǔ", "yù",
2875                        "yu", "yu", "yu", "yu", "yu", "yu",
2876                },
2877                new String[]{
2878                        "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x",
2879                        "zh", "ch", "sh", "r", "z", "ts", "s",
2880
2881                        "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2882                        "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2883                        "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2884                        "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2885                },
2886                new String[]{
2887                        "nb", "np", "nf", "nd", "nt", "nl", "ng", "nk", "nj", "nq", "nx", "nzh", "nch", "nsh", "nz", "nts", "ns",
2888                        "nb", "np", "nf", "nd", "nt", "nl", "ng", "nk", "nj", "nq", "nx", "nzh", "nch", "nsh", "nz", "nts", "ns",
2889
2890                        "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "ts", "s",
2891
2892                        "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2893                        "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2894                        "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2895                        "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s",
2896                },
2897                new String[]{
2898                        "n", "n", "n", "n", "n", "n", "n",
2899                        "ng", "ng", "ng", "ng", "ng", "ng",
2900                        "r", "r", "r",
2901                },
2902                new String[]{},
2903                new String[]{}, new int[]{1, 2, 3}, new double[]{14, 3, 1}, 0.175, 0.55, 0.0, 0.0, genericSanityChecks, true);
2904    }
2905
2906    /**
2907     * An approximation of Hanyu Pinyin, a Romanization technique used for Mandarin Chinese that has been in common use
2908     * since the 1980s. This makes some slight changes so the vulgarity filters this uses can understand how some
2909     * letters sound; Pinyin's letter c becomes ts, and this replaces the u with umlaut, ü, in all cases with yu.
2910     * <br>
2911     * Tuàn tiāzhǎn dér, ǔngínbǔng xōr shàū kán nu tsīn.
2912     */
2913    public static final FakeLanguageGen CHINESE_ROMANIZED = chinese().register("Chinese Romanized");
2914
2915    private static FakeLanguageGen cherokee(){
2916        return new FakeLanguageGen(
2917                new String[]{
2918                        "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü",
2919                        "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü",
2920                        "ai", "au", "oa", "oi", "ai", "au", "oa", "oi",
2921                        "a", "a", "a", "a", "a", "a", "a", "a", "a",
2922                        "ah", "ah", "ah", "ah", "ah", "ah", "ah",
2923                },
2924                new String[]{
2925                },
2926                new String[]{
2927                        "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y",
2928                        "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y",
2929                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2930                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2931                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2932                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2933                        "g", "h", "n", "qu", "s", "d", "t",
2934                        "g", "h", "n", "qu", "s", "d", "t",
2935                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2936                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2937                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2938                        },
2939                new String[]{
2940                        "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y",
2941                        "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y",
2942                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2943                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2944                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2945                        "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y",
2946                        "g", "h", "n", "qu", "s", "d", "t",
2947                        "g", "h", "n", "qu", "s", "d", "t",
2948                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2949                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2950                        "h", "n", "s", "d", "t", "h", "n", "s", "d", "t",
2951                        "sn", "sn", "st", "st", "squ", "squ",
2952                        "th", "kh", "sh", "th", "kh", "sh", "th", "kh", "sh",
2953                        "th", "sh", "th", "sh", "th", "sh", "th", "sh",
2954                },
2955                new String[]{
2956                        "s"
2957                },
2958                new String[]{
2959                },
2960                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{4, 7, 6, 2}, 0.3, 0.96, 0.0, 0.0, null, true);
2961    }
2962    /**
2963     * A rough imitation of the Cherokee language, using an attempt at romanizing the syllabary the language is often
2964     * written with, using only the parts of the language that are usually written down. Some of the orthography
2965     * rules aren't clear across Internet information about the language, so this really is a "fake" language it will be
2966     * generating, not the real thing at all. The vowel 'ü' is used in place of the 'v' that the normal transliteration
2967     * uses, to help with profanity-checking what this generates; it is pronounced like in the French word "un".
2968     * <br>
2969     * Dah utugü tsahnahsütoi gohü usütahdi asi tsau dah tashi.
2970     */
2971    public static final FakeLanguageGen CHEROKEE_ROMANIZED = cherokee().register("Cherokee Romanized");
2972
2973    private static FakeLanguageGen vietnamese() {
2974        return new FakeLanguageGen(new String[]{
2975                "a", "à", "á", "â", "ä", "ā", "ă",
2976                "e", "è", "é", "ê", "ë", "ē", "ĕ",
2977                "i", "ì", "í", "î", "ï", "ī", "ĭ",
2978                "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
2979                "u", "ù", "ú", "û", "ü", "ū", "ŭ",
2980        },
2981                new String[]{
2982                        "a", "à", "á", "â", "ä", "ā", "ă",
2983                        "a", "à", "á", "â", "ä", "ā", "ă",
2984                        "a", "à", "á", "â", "ä", "ā", "ă",
2985                        "a", "à", "á", "â", "ä", "ā", "ă",
2986                        "e", "è", "é", "ê", "ë", "ē", "ĕ",
2987                        "i", "ì", "í", "î", "ï", "ī", "ĭ",
2988                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
2989                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
2990                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
2991                        "u", "ù", "ú", "û", "ü", "ū", "ŭ",
2992
2993                        "a", "à", "á", "â", "ä", "ā", "ă",
2994                        "a", "à", "á", "â", "ä", "ā", "ă",
2995                        "a", "à", "á", "â", "ä", "ā", "ă",
2996                        "a", "à", "á", "â", "ä", "ā", "ă",
2997                        "e", "è", "é", "ê", "ë", "ē", "ĕ",
2998                        "i", "ì", "í", "î", "ï", "ī", "ĭ",
2999                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3000                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3001                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3002                        "u", "ù", "ú", "û", "ü", "ū", "ŭ",
3003
3004                        "a", "à", "á", "â", "ä", "ā", "ă",
3005                        "a", "à", "á", "â", "ä", "ā", "ă",
3006                        "a", "à", "á", "â", "ä", "ā", "ă",
3007                        "a", "à", "á", "â", "ä", "ā", "ă",
3008                        "e", "è", "é", "ê", "ë", "ē", "ĕ",
3009                        "i", "ì", "í", "î", "ï", "ī", "ĭ",
3010                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3011                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3012                        "o", "ò", "ó", "ô", "ö", "ō", "ŏ",
3013                        "u", "ù", "ú", "û", "ü", "ū", "ŭ",
3014
3015                        "ua", "uà", "uá", "uâ", "uä", "uā", "uă",
3016                        "ie", "iè", "ié", "iê", "ië", "iē", "iĕ",
3017                        "ie", "iè", "ié", "iê", "ië", "iē", "iĕ",
3018                        "ie", "ìe", "íe", "îe", "ïe", "īe", "ĭe",
3019                        "iu", "ìu", "íu", "îu", "ïu", "īu", "ĭu",
3020                        "oi", "òi", "ói", "ôi", "öi", "ōi", "ŏi",
3021                        "uo", "ùo", "úo", "ûo", "üo", "ūo", "ŭo",
3022                        "uo", "ùo", "úo", "ûo", "üo", "ūo", "ŭo",
3023
3024                        "y", "y", "y", "y", "y", "y", "y",
3025                        "ye", "yè", "yé", "yê", "yë", "yē", "yĕ",
3026                },
3027                new String[]{
3028                        "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "l", "m", "n", "ng", "nh", "p", "ph", "qu", "r",
3029                        "s", "t", "th", "tr", "v", "x",
3030                        "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3031                        "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3032                        "b", "c", "d", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3033                        "b", "c", "d", "l", "n", "ng", "p", "ph", "th", "tr",
3034                        "b", "c", "d", "l", "n", "ng", "p", "ph", "th", "tr",
3035                        "b", "c", "d", "l", "n", "ng", "p",
3036                        "b", "c", "d", "l", "n", "ng", "p",
3037                        "b", "c", "d", "l", "n", "ng", "p",
3038                }, new String[]{
3039                "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "l", "m", "n", "ng", "nh", "p", "ph", "qu", "r",
3040                "s", "t", "th", "tr", "v", "x",
3041                "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3042                "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3043                "b", "c", "d", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v",
3044                "b", "c", "d", "l", "n", "ng", "p", "ph", "t", "th", "tr",
3045                "b", "c", "d", "l", "n", "ng", "p", "ph", "t", "th", "tr",
3046                "b", "c", "d", "l", "n", "ng", "p", "t",
3047                "b", "c", "d", "l", "n", "ng", "p", "t",
3048                "b", "c", "d", "l", "n", "ng", "p",
3049        },
3050                new String[]{
3051                        "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "m", "m", "n", "ng", "nh", "p", "ch", "r",
3052                        "s", "t", "x",
3053                        "b", "c", "d", "ð", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c",
3054                        "b", "c", "d", "ð", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c",
3055                        "b", "c", "d", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c",
3056                        "b", "c", "d", "m", "n", "ng", "p", "n", "t", "nh", "ng",
3057                        "b", "c", "d", "m", "n", "ng", "p", "n", "t", "nh", "ng",
3058                        "b", "c", "d", "m", "n", "ng", "p", "t",
3059                        "b", "c", "d", "m", "n", "ng", "p", "t",
3060                        "b", "c", "d", "m", "n", "ng", "p",
3061                }, new String[]{}, new String[]{}, new int[]{1, 2, 3}, new double[]{37.0, 3.0, 1.0},
3062                0.04, 0.4, 0.0, 0.0, genericSanityChecks, true);
3063    }
3064    /**
3065     * A very rough imitation of the Vietnamese language, without using the accurate characters Vietnamese really uses
3066     * but that are rare in fonts. Since so many letters in correct Vietnamese aren't available in most fonts, this
3067     * can't represent most of the accented vowels in the language, but it tries, with 6 accents for each of a, e, i, o,
3068     * and u, though none for y. It also uses 'ð' from Icelandic in place of the correct d with bar. This could also
3069     * maybe be used as an approximation of (badly) Romanized Thai, since Thai normally uses its own script but also has
3070     * many tones (which would be indicated by the accents here).
3071     * <br>
3072     * Bach trich, nŏ ngiukh nga cä tran ngonh... 
3073     */
3074    public static final FakeLanguageGen VIETNAMESE = vietnamese().register("Vietnamese");
3075
3076    /**
3077     * An array that stores all the hand-made FakeLanguageGen constants; it does not store randomly-generated languages
3078     * nor does it store modifications or mixes of languages. The order these are stored in is related to the numeric
3079     * codes for languages in the {@link #serializeToString()} output, but neither is dependent on the other if this
3080     * array is changed for some reason (which is not recommended, but not out of the question). If this is modified,
3081     * then it is probably a bad idea to assign null to any elements in registered; special care is taken to avoid null
3082     * elements in its original state, so some code may rely on the items being usable and non-null.
3083     */
3084    public static final FakeLanguageGen[] registered;
3085    public static final String[] registeredNames;
3086    static {
3087        // the first item in registry is null so it can be a placeholder for random languages; we want to skip it.
3088        registered = new FakeLanguageGen[registry.size()-1];
3089        registeredNames = new String[registered.length];
3090        for (int i = 0; i < registered.length; i++) {
3091            registeredNames[i] = registry.keyAt(i+1);
3092            registered[i] = registry.getAt(i+1);
3093        }
3094    }
3095
3096    /**
3097     * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen by
3098     * name (using a name from {@link #registeredNames}).
3099     * @param name a String name such as "English", "Korean Romanized", or "Russian Authentic"
3100     * @return a FakeLanguageGen corresponding to the given name, or null if none was found
3101     */
3102    public static FakeLanguageGen get(String name)
3103    {
3104        return registry.get(name);
3105    }
3106    /**
3107     * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen by
3108     * index, from 0 to {@code FakeLanguageGen.registered.length - 1}.
3109     * @param index an int from 0 to {@code FakeLanguageGen.registered.length - 1}
3110     * @return a FakeLanguageGen corresponding to the given index, or null if none was found
3111     */
3112    public static FakeLanguageGen getAt(int index)
3113    {
3114        return registry.getAt(index);
3115    }
3116    /**
3117     * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen's
3118     * name by index, from 0 to {@code FakeLanguageGen.registeredNames.length - 1}.
3119     * @param index an int from 0 to {@code FakeLanguageGen.registeredNames.length - 1}
3120     * @return a FakeLanguageGen corresponding to the given index, or null if none was found
3121     */
3122    public static String nameAt(int index)
3123    {
3124        return registry.keyAt(index);
3125    }
3126
3127    /**
3128     * FakeLanguageGen constants that are meant to sound like specific real-world languages, and that all use the Latin
3129     * script (like English) with maybe some accents.
3130     */
3131    public static final FakeLanguageGen[] romanizedHumanLanguages = {
3132            ENGLISH, KOREAN_ROMANIZED, SPANISH, SWAHILI, NORSE_SIMPLIFIED, ARABIC_ROMANIZED, HINDI_ROMANIZED, FRENCH,
3133            MAORI, GREEK_ROMANIZED, INUKTITUT, RUSSIAN_ROMANIZED, NAHUATL, JAPANESE_ROMANIZED, MONGOLIAN, SOMALI, CROW, 
3134            ANCIENT_EGYPTIAN, MALAY, CHINESE_ROMANIZED, CHEROKEE_ROMANIZED, VIETNAMESE
3135    };
3136
3137    /**
3138     * Zero-arg constructor for a FakeLanguageGen; produces a FakeLanguageGen equivalent to FakeLanguageGen.ENGLISH .
3139     */
3140    public FakeLanguageGen() {
3141        this(
3142                new String[]{
3143                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
3144                        "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u",
3145                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
3146                        "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u",
3147                        "au", "ai", "ai", "ou", "ea", "ie", "io", "ei",
3148                },
3149                new String[]{"u", "u", "oa", "oo", "oo", "oo", "ee", "ee", "ee", "ee",},
3150                new String[]{
3151                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n",
3152                        "p", "pl", "pr", "qu", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "thr", "v", "w", "y", "z",
3153                        "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n",
3154                        "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y",
3155                        "b", "br", "c", "ch", "d", "dr", "f", "g", "h", "j", "l", "m", "n",
3156                        "p", "r", "s", "sh", "st", "sl", "t", "tr", "th",
3157                        "b", "d", "f", "g", "h", "l", "m", "n",
3158                        "p", "r", "s", "sh", "t", "th",
3159                        "b", "d", "f", "g", "h", "l", "m", "n",
3160                        "p", "r", "s", "sh", "t", "th",
3161                        "r", "s", "t", "l", "n",
3162                        "str", "spr", "spl", "wr", "kn", "kn", "gn",
3163                },
3164                new String[]{"x", "cst", "bs", "ff", "lg", "g", "gs",
3165                        "ll", "ltr", "mb", "mn", "mm", "ng", "ng", "ngl", "nt", "ns", "nn", "ps", "mbl", "mpr",
3166                        "pp", "ppl", "ppr", "rr", "rr", "rr", "rl", "rtn", "ngr", "ss", "sc", "rst", "tt", "tt", "ts", "ltr", "zz"
3167                },
3168                new String[]{"b", "rb", "bb", "c", "rc", "ld", "d", "ds", "dd", "f", "ff", "lf", "rf", "rg", "gs", "ch", "lch", "rch", "tch",
3169                        "ck", "ck", "lk", "rk", "l", "ll", "lm", "m", "rm", "mp", "n", "nk", "nch", "nd", "ng", "ng", "nt", "ns", "lp", "rp",
3170                        "p", "r", "rn", "rts", "s", "s", "s", "s", "ss", "ss", "st", "ls", "t", "t", "ts", "w", "wn", "x", "ly", "lly", "z",
3171                        "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "t", "w",
3172                },
3173                new String[]{"ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
3174                        "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
3175                        "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y",
3176                        "ay", "ay", "ey", "oy", "ay", "ay", "ey", "oy",
3177                        "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition",
3178                        "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition",
3179                        "ily", "ily", "ily", "adly", "owly", "oorly", "ardly", "iedly",
3180                },
3181                new String[]{}, new int[]{1, 2, 3, 4}, new double[]{10, 11, 4, 1}, 0.22, 0.1, 0.0, 0.22, englishSanityChecks, true);
3182    }
3183
3184    /**
3185     * This is a very complicated constructor! Maybe look at the calls to this to initialize static members of this
3186     * class, LOVECRAFT and GREEK_ROMANIZED.
3187     *
3188     * @param openingVowels        String array where each element is a vowel or group of vowels that may appear at the start
3189     *                             of a word or in the middle; elements may be repeated to make them more common
3190     * @param midVowels            String array where each element is a vowel or group of vowels that may appear in the
3191     *                             middle of the word; all openingVowels are automatically copied into this internally.
3192     *                             Elements may be repeated to make them more common
3193     * @param openingConsonants    String array where each element is a consonant or consonant cluster that can appear
3194     *                             at the start of a word; elements may be repeated to make them more common
3195     * @param midConsonants        String array where each element is a consonant or consonant cluster than can appear
3196     *                             between vowels; all closingConsonants are automatically copied into this internally.
3197     *                             Elements may be repeated to make them more common
3198     * @param closingConsonants    String array where each element is a consonant or consonant cluster than can appear
3199     *                             at the end of a word; elements may be repeated to make them more common
3200     * @param closingSyllables     String array where each element is a syllable starting with a vowel and ending in
3201     *                             whatever the word should end in; elements may be repeated to make them more common
3202     * @param vowelSplitters       String array where each element is a mark that goes between vowels, so if "-" is in this,
3203     *                             then "a-a" may be possible; elements may be repeated to make them more common
3204     * @param syllableLengths      int array where each element is a possible number of syllables a word can use; closely
3205     *                             tied to syllableFrequencies
3206     * @param syllableFrequencies  double array where each element corresponds to an element in syllableLengths and
3207     *                             represents how often each syllable count should appear relative to other counts; there
3208     *                             is no need to restrict the numbers to add up to any other number
3209     * @param vowelStartFrequency  a double between 0.0 and 1.0 that determines how often words start with vowels;
3210     *                             higher numbers yield more words starting with vowels
3211     * @param vowelEndFrequency    a double between 0.0 and 1.0 that determines how often words end with vowels; higher
3212     *                             numbers yield more words ending in vowels
3213     * @param vowelSplitFrequency  a double between 0.0 and 1.0 that, if vowelSplitters is not empty, determines how
3214     *                             often a vowel will be split into two vowels separated by one of those splitters
3215     * @param syllableEndFrequency a double between 0.0 and 1.0 that determines how often an element of
3216     *                             closingSyllables is used instead of ending normally
3217     */
3218    public FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants,
3219                           String[] midConsonants, String[] closingConsonants, String[] closingSyllables, String[] vowelSplitters,
3220                           int[] syllableLengths, double[] syllableFrequencies, double vowelStartFrequency,
3221                           double vowelEndFrequency, double vowelSplitFrequency, double syllableEndFrequency) {
3222        this(openingVowels, midVowels, openingConsonants, midConsonants, closingConsonants, closingSyllables,
3223                vowelSplitters, syllableLengths, syllableFrequencies, vowelStartFrequency, vowelEndFrequency,
3224                vowelSplitFrequency, syllableEndFrequency, englishSanityChecks, true);
3225    }
3226
3227    /**
3228     * This is a very complicated constructor! Maybe look at the calls to this to initialize static members of this
3229     * class, LOVECRAFT and GREEK_ROMANIZED.
3230     *
3231     * @param openingVowels        String array where each element is a vowel or group of vowels that may appear at the start
3232     *                             of a word or in the middle; elements may be repeated to make them more common
3233     * @param midVowels            String array where each element is a vowel or group of vowels that may appear in the
3234     *                             middle of the word; all openingVowels are automatically copied into this internally.
3235     *                             Elements may be repeated to make them more common
3236     * @param openingConsonants    String array where each element is a consonant or consonant cluster that can appear
3237     *                             at the start of a word; elements may be repeated to make them more common
3238     * @param midConsonants        String array where each element is a consonant or consonant cluster than can appear
3239     *                             between vowels; all closingConsonants are automatically copied into this internally.
3240     *                             Elements may be repeated to make them more common
3241     * @param closingConsonants    String array where each element is a consonant or consonant cluster than can appear
3242     *                             at the end of a word; elements may be repeated to make them more common
3243     * @param closingSyllables     String array where each element is a syllable starting with a vowel and ending in
3244     *                             whatever the word should end in; elements may be repeated to make them more common
3245     * @param vowelSplitters       String array where each element is a mark that goes between vowels, so if "-" is in this,
3246     *                             then "a-a" may be possible; elements may be repeated to make them more common
3247     * @param syllableLengths      int array where each element is a possible number of syllables a word can use; closely
3248     *                             tied to syllableFrequencies
3249     * @param syllableFrequencies  double array where each element corresponds to an element in syllableLengths and
3250     *                             represents how often each syllable count should appear relative to other counts; there
3251     *                             is no need to restrict the numbers to add up to any other number
3252     * @param vowelStartFrequency  a double between 0.0 and 1.0 that determines how often words start with vowels;
3253     *                             higher numbers yield more words starting with vowels
3254     * @param vowelEndFrequency    a double between 0.0 and 1.0 that determines how often words end with vowels; higher
3255     *                             numbers yield more words ending in vowels
3256     * @param vowelSplitFrequency  a double between 0.0 and 1.0 that, if vowelSplitters is not empty, determines how
3257     *                             often a vowel will be split into two vowels separated by one of those splitters
3258     * @param syllableEndFrequency a double between 0.0 and 1.0 that determines how often an element of
3259     *                             closingSyllables is used instead of ending normally
3260     * @param sane                 true to perform sanity checks for pronounce-able sounds to most English speakers, replacing many
3261     *                             words that are impossible to say; slows down generation slightly, irrelevant for non-Latin alphabets
3262     * @param clean                true to perform vulgarity/obscenity checks on the word, replacing it if it is too close to a
3263     *                             common English vulgarity, obscenity, or slur/epithet; slows down generation slightly
3264     */
3265    public FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants,
3266                           String[] midConsonants, String[] closingConsonants, String[] closingSyllables, String[] vowelSplitters,
3267                           int[] syllableLengths, double[] syllableFrequencies, double vowelStartFrequency,
3268                           double vowelEndFrequency, double vowelSplitFrequency, double syllableEndFrequency,
3269                           Pattern[] sane, boolean clean) {
3270        this.openingVowels = openingVowels;
3271        this.midVowels = new String[openingVowels.length + midVowels.length];
3272        System.arraycopy(midVowels, 0, this.midVowels, 0, midVowels.length);
3273        System.arraycopy(openingVowels, 0, this.midVowels, midVowels.length, openingVowels.length);
3274        this.openingConsonants = openingConsonants;
3275        this.midConsonants = new String[midConsonants.length + closingConsonants.length];
3276        System.arraycopy(midConsonants, 0, this.midConsonants, 0, midConsonants.length);
3277        System.arraycopy(closingConsonants, 0, this.midConsonants, midConsonants.length, closingConsonants.length);
3278        this.closingConsonants = closingConsonants;
3279        this.vowelSplitters = vowelSplitters;
3280        this.closingSyllables = closingSyllables;
3281
3282        this.syllableFrequencies = new double[syllableLengths[syllableLengths.length - 1]];
3283        totalSyllableFrequency = 0.0;
3284        for (int i = 0; i < syllableLengths.length; i++) {
3285            totalSyllableFrequency += (this.syllableFrequencies[syllableLengths[i]-1] = syllableFrequencies[i]);
3286        }
3287
3288        if (vowelStartFrequency > 1.0)
3289            this.vowelStartFrequency = 1.0 / vowelStartFrequency;
3290        else
3291            this.vowelStartFrequency = vowelStartFrequency;
3292        if (vowelEndFrequency > 1.0)
3293            this.vowelEndFrequency = 1.0 / vowelEndFrequency;
3294        else
3295            this.vowelEndFrequency = vowelEndFrequency;
3296        if (vowelSplitters.length == 0)
3297            this.vowelSplitFrequency = 0.0;
3298        else if (vowelSplitFrequency > 1.0)
3299            this.vowelSplitFrequency = 1.0 / vowelSplitFrequency;
3300        else
3301            this.vowelSplitFrequency = vowelSplitFrequency;
3302        if (closingSyllables.length == 0)
3303            this.syllableEndFrequency = 0.0;
3304        else if (syllableEndFrequency > 1.0)
3305            this.syllableEndFrequency = 1.0 / syllableEndFrequency;
3306        else
3307            this.syllableEndFrequency = syllableEndFrequency;
3308        this.clean = clean;
3309        sanityChecks = sane;
3310        modifiers = new ArrayList<>(4);
3311    }
3312
3313    private FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants,
3314                            String[] midConsonants, String[] closingConsonants, String[] closingSyllables,
3315                            String[] vowelSplitters, double[] syllableFrequencies,
3316                            double vowelStartFrequency, double vowelEndFrequency, double vowelSplitFrequency,
3317                            double syllableEndFrequency, Pattern[] sanityChecks, boolean clean,
3318                            List<Modifier> modifiers) {
3319        this.openingVowels = copyStrings(openingVowels);
3320        this.midVowels = copyStrings(midVowels);
3321        this.openingConsonants = copyStrings(openingConsonants);
3322        this.midConsonants = copyStrings(midConsonants);
3323        this.closingConsonants = copyStrings(closingConsonants);
3324        this.closingSyllables = copyStrings(closingSyllables);
3325        this.vowelSplitters = copyStrings(vowelSplitters);
3326        this.syllableFrequencies = Arrays.copyOf(syllableFrequencies, syllableFrequencies.length);
3327        this.vowelStartFrequency = vowelStartFrequency;
3328        this.vowelEndFrequency = vowelEndFrequency;
3329        this.vowelSplitFrequency = vowelSplitFrequency;
3330        this.syllableEndFrequency = syllableEndFrequency;
3331        for (int i = 0; i < syllableFrequencies.length; i++) {
3332            totalSyllableFrequency += syllableFrequencies[i];
3333        }
3334        if (sanityChecks == null)
3335            this.sanityChecks = null;
3336        else {
3337            this.sanityChecks = new Pattern[sanityChecks.length];
3338            System.arraycopy(sanityChecks, 0, this.sanityChecks, 0, sanityChecks.length);
3339        }
3340        this.clean = clean;
3341        this.modifiers = new ArrayList<>(modifiers);
3342    }
3343
3344    private static String[] processParts(OrderedMap<String, String> parts, Set<String> missingSounds,
3345                                         Set<String> forbidden, IRNG rng, double repeatSingleChance,
3346                                         int preferredLimit) {
3347        int l, sz = parts.size();
3348        List<String> working = new ArrayList<>(sz * 24);
3349        String pair;
3350        for (int e = 0; e < parts.size(); e++) {
3351            Map.Entry<String, String> sn = parts.entryAt(e);
3352            if (missingSounds.contains(sn.getKey()))
3353                continue;
3354            for (String t : sn.getValue().split(" ")) {
3355                if (forbidden.contains(t))
3356                    continue;
3357                l = t.length();
3358                int num;
3359                char c;
3360                switch (l) {
3361                    case 0:
3362                        break;
3363                    case 1:
3364                        working.add(t);
3365                        working.add(t);
3366                        working.add(t);
3367                        c = t.charAt(0);
3368                        num = 0;
3369                        boolean repeat = true;
3370                        switch (c) {
3371                            case 'w':
3372                                num += 2;
3373                            case 'y':
3374                            case 'h':
3375                                num += 4;
3376                            case 'q':
3377                            case 'x':
3378                                num += 4;
3379                                repeat = false;
3380                                break;
3381                            case 'i':
3382                            case 'u':
3383                                repeat = false;
3384                                num = 13;
3385                                break;
3386                            case 'z':
3387                            case 'v':
3388                                num = 4;
3389                                break;
3390                            case 'j':
3391                                num = 7;
3392                                break;
3393                            default:
3394                                if (e >= preferredLimit)
3395                                    num = 6;
3396                                else
3397                                    num = 13;
3398                        }
3399                        for (int i = 0; i < num * 3; i++) {
3400                            if (rng.nextDouble() < 0.75) {
3401                                working.add(t);
3402                            }
3403                        }
3404
3405                        if (repeat && rng.nextDouble() < repeatSingleChance) {
3406                            pair = t + t;
3407                            if (missingSounds.contains(pair))
3408                                continue;
3409                            working.add(pair);
3410                            working.add(pair);
3411                            working.add(pair);
3412                            if (rng.nextDouble() < 0.7) {
3413                                working.add(pair);
3414                                working.add(pair);
3415                            }
3416                            if (rng.nextDouble() < 0.7) {
3417                                working.add(pair);
3418                            }
3419                        }
3420
3421                        break;
3422                    case 2:
3423                        if (rng.nextDouble() < 0.65) {
3424                            c = t.charAt(1);
3425                            switch (c) {
3426                                case 'z':
3427                                    num = 1;
3428                                    break;
3429                                case 'w':
3430                                    num = 3;
3431                                    break;
3432                                case 'n':
3433                                    num = 4;
3434                                    break;
3435                                default:
3436
3437                                    if (e >= preferredLimit)
3438                                        num = 2;
3439                                    else
3440                                        num = 7;
3441                            }
3442                            working.add(t);
3443                            for (int i = 0; i < num; i++) {
3444                                if (rng.nextDouble() < 0.25) {
3445                                    working.add(t);
3446                                }
3447                            }
3448                        }
3449                        break;
3450                    case 3:
3451                        if (rng.nextDouble() < 0.5) {
3452                            c = t.charAt(0);
3453                            switch (c) {
3454                                case 'z':
3455                                    num = 1;
3456                                    break;
3457                                case 'w':
3458                                    num = 3;
3459                                    break;
3460                                case 'n':
3461                                    num = 4;
3462                                    break;
3463                                default:
3464                                    if (e >= preferredLimit)
3465                                        num = 2;
3466                                    else
3467                                        num = 6;
3468                            }
3469                            working.add(t);
3470                            for (int i = 0; i < num; i++) {
3471                                if (rng.nextDouble() < 0.2) {
3472                                    working.add(t);
3473                                }
3474                            }
3475                        }
3476                        break;
3477                    default:
3478                        if (rng.nextDouble() < 0.3 && (t.charAt(l - 1) != 'z' || rng.nextDouble() < 0.1)) {
3479                            working.add(t);
3480                        }
3481                        break;
3482                }
3483            }
3484        }
3485        return working.toArray(new String[0]);
3486    }
3487
3488    /*private static final String[][] openVowels = new String[][]{
3489            new String[]{"a", "a", "a", "a", "aa", "ae", "ai", "au", "ea", "ia", "oa", "ua",},
3490            new String[]{"e", "e", "e", "e", "ae", "ea", "ee", "ei", "eo", "eu", "ie", "ue",},
3491            new String[]{"i", "i", "i", "i", "ai", "ei", "ia", "ie", "io", "iu", "oi", "ui",},
3492            new String[]{"o", "o", "o", "o", "eo", "io", "oa", "oi", "oo", "ou",},
3493            new String[]{"u", "u", "u", "u", "au", "eu", "iu", "ou", "ua", "ue", "ui",},
3494    };
3495*/
3496
3497    public static FakeLanguageGen randomLanguage(IRNG rng) {
3498        return randomLanguage(rng.nextLong());
3499    }
3500
3501    public static FakeLanguageGen randomLanguage(long seed) {
3502        GWTRNG rng = new GWTRNG(seed);
3503        int[] lengths = new int[rng.between(3, 5)];
3504        System.arraycopy(new int[]{1, 2, 3, 4}, 0, lengths, 0, lengths.length);
3505        double[] chances = new double[lengths.length];
3506        System.arraycopy(new double[]{
3507                5 + rng.nextDouble(4), 13 + rng.nextDouble(9), 3 + rng.nextDouble(3), 1 + rng.nextDouble(2)
3508        }, 0, chances, 0, chances.length);
3509        double vowelHeavy = rng.between(0.2, 0.5), removalRate = rng.between(0.15, 0.65);
3510        int sz = openCons.size();
3511        int[] reordering = rng.randomOrdering(sz), vOrd = rng.randomOrdering(openVowels.size());
3512        OrderedMap<String, String>
3513                parts0 = new OrderedMap<>(openVowels),
3514                parts1 = new OrderedMap<>(openCons),
3515                parts2 = new OrderedMap<>(midCons),
3516                parts3 = new OrderedMap<>(closeCons);
3517        OrderedSet<String> forbidden = new OrderedSet<>(1024, 0.25f), missingSounds = new OrderedSet<>(64, 0.875f);
3518        parts1.reorder(reordering);
3519        parts2.reorder(reordering);
3520        parts3.reorder(reordering);
3521        parts0.reorder(vOrd);
3522        int n;
3523
3524        int mn = Math.min(rng.nextInt(3), rng.nextInt(3)), sz0, p0s;
3525
3526        for (n = 0; n < mn; n++) {
3527            missingSounds.add(parts0.keyAt(0));
3528            Collections.addAll(forbidden, parts0.getAt(0).split(" "));
3529            parts0.removeFirst();
3530        }
3531        p0s = parts0.size();
3532        sz0 = Math.max(rng.between(1, p0s + 1), rng.between(1, p0s + 1));
3533        char[] nextAccents = new char[sz0], unaccented = new char[sz0];
3534        int vowelAccent = rng.between(1, 7);
3535        for (int i = 0; i < sz0; i++) {
3536            nextAccents[i] = accentedVowels[vOrd[i + mn]][vowelAccent];
3537            unaccented[i] = accentedVowels[vOrd[i + mn]][0];
3538        }
3539        if (rng.nextDouble() < 0.8) {
3540            for (int i = 0; i < sz0; i++) {
3541                char ac = nextAccents[i], ua = unaccented[i];
3542                String v = "", uas = String.valueOf(ua);
3543                Pattern pat = Pattern.compile("\\b([aeiou]*)(" + ua + ")([aeiou]*)\\b");
3544                Replacer rep = pat.replacer("$1$2$3 $1" + ac + "$3"), repLess = pat.replacer("$1" + ac + "$3");
3545                for (int j = 0; j < p0s; j++) {
3546                    String k = parts0.keyAt(j);
3547                    if (uas.equals(k)) // uas is never null, always length 1
3548                        v = parts0.getAt(j);
3549                    else {
3550                        String current = parts0.getAt(j);
3551                        String[] splits = current.split(" ");
3552                        for (int s = 0; s < splits.length; s++) {
3553                            if (forbidden.contains(uas) && splits[s].contains(uas))
3554                                forbidden.add(splits[s].replace(ua, ac));
3555                        }
3556                        parts0.put(k, rep.replace(current));
3557                    }
3558                }
3559                parts0.put(String.valueOf(ac), repLess.replace(v));
3560            }
3561        }
3562
3563        n = 0;
3564        if (rng.nextDouble() < 0.75) {
3565            missingSounds.add("z");
3566            Collections.addAll(forbidden, parts1.get("z").split(" "));
3567            Collections.addAll(forbidden, parts2.get("z").split(" "));
3568            Collections.addAll(forbidden, parts3.get("z").split(" "));
3569            n++;
3570        }
3571        if (rng.nextDouble() < 0.82) {
3572            missingSounds.add("x");
3573            Collections.addAll(forbidden, parts1.get("x").split(" "));
3574            Collections.addAll(forbidden, parts2.get("x").split(" "));
3575            Collections.addAll(forbidden, parts3.get("x").split(" "));
3576            n++;
3577        }
3578        if (rng.nextDouble() < 0.92) {
3579            missingSounds.add("qu");
3580            Collections.addAll(forbidden, parts1.get("qu").split(" "));
3581            Collections.addAll(forbidden, parts2.get("qu").split(" "));
3582            Collections.addAll(forbidden, parts3.get("qu").split(" "));
3583            n++;
3584        }
3585        if (rng.nextDouble() < 0.96) {
3586            missingSounds.add("q");
3587            Collections.addAll(forbidden, parts1.get("q").split(" "));
3588            Collections.addAll(forbidden, parts2.get("q").split(" "));
3589            Collections.addAll(forbidden, parts3.get("q").split(" "));
3590            n++;
3591        }
3592        if (rng.nextDouble() < 0.97) {
3593            missingSounds.add("tl");
3594            Collections.addAll(forbidden, parts1.get("tl").split(" "));
3595            Collections.addAll(forbidden, parts2.get("tl").split(" "));
3596            Collections.addAll(forbidden, parts3.get("tl").split(" "));
3597            n++;
3598        }
3599        if (rng.nextDouble() < 0.86) {
3600            missingSounds.add("ph");
3601            Collections.addAll(forbidden, parts1.get("ph").split(" "));
3602            Collections.addAll(forbidden, parts2.get("ph").split(" "));
3603            Collections.addAll(forbidden, parts3.get("ph").split(" "));
3604            n++;
3605        }
3606        if (rng.nextDouble() < 0.94) {
3607            missingSounds.add("kh");
3608            Collections.addAll(forbidden, parts1.get("kh").split(" "));
3609            Collections.addAll(forbidden, parts2.get("kh").split(" "));
3610            Collections.addAll(forbidden, parts3.get("kh").split(" "));
3611            n++;
3612        }
3613        if (rng.nextDouble() < 0.96) {
3614            missingSounds.add("bh");
3615            missingSounds.add("dh");
3616            Collections.addAll(forbidden, parts1.get("bh").split(" "));
3617            Collections.addAll(forbidden, parts2.get("bh").split(" "));
3618            Collections.addAll(forbidden, parts3.get("bh").split(" "));
3619            Collections.addAll(forbidden, parts1.get("dh").split(" "));
3620            Collections.addAll(forbidden, parts2.get("dh").split(" "));
3621            Collections.addAll(forbidden, parts3.get("dh").split(" "));
3622            n++;
3623            n++;
3624        }
3625
3626        for (; n < sz * removalRate; n++) {
3627            missingSounds.add(parts1.keyAt(n));
3628            missingSounds.add(parts2.keyAt(n));
3629            missingSounds.add(parts3.keyAt(n));
3630            Collections.addAll(forbidden, parts1.getAt(n).split(" "));
3631            Collections.addAll(forbidden, parts2.getAt(n).split(" "));
3632            Collections.addAll(forbidden, parts3.getAt(n).split(" "));
3633        }
3634
3635        return new FakeLanguageGen(
3636                processParts(parts0, missingSounds, forbidden, rng, 0.0, p0s),
3637                new String[]{},
3638                processParts(openCons, missingSounds, forbidden, rng, 0.0, 4096),
3639                processParts(midCons, missingSounds, forbidden, rng, (rng.nextDouble() * 3 - 0.75) * 0.4444, 4096),
3640                processParts(closeCons, missingSounds, forbidden, rng, (rng.nextDouble() * 3 - 0.75) * 0.2857, 4096),
3641                new String[]{},
3642                new String[]{}, lengths, chances, vowelHeavy, vowelHeavy * 1.8, 0.0, 0.0, genericSanityChecks, true).summarize("0#" + seed + "@1");
3643    }
3644
3645    protected static boolean checkAll(CharSequence testing, Pattern[] checks) {
3646        CharSequence fixed = removeAccents(testing);
3647        for (int i = 0; i < checks.length; i++) {
3648            if (checks[i].matcher(fixed).find())
3649                return false;
3650        }
3651        return true;
3652    }
3653
3654    /**
3655     * Checks a CharSequence, such as a String, against an overzealous vulgarity filter, returning true if the text
3656     * could contain vulgar elements or words that could seem vulgar or juvenile. The idea here is that false positives
3657     * are OK as long as there are very few false negatives (missed vulgar words). Does not check punctuation or numbers
3658     * that could look like letters.
3659     * @param testing the text, as a CharSequence such as a String, to check
3660     * @return true if the text could contain a vulgar or juvenile element; false if it probably doesn't
3661     */
3662    public static boolean checkVulgarity(CharSequence testing)
3663    {
3664        CharSequence fixed = removeAccents(testing);
3665        for (int i = 0; i < vulgarChecks.length; i++) {
3666            if (vulgarChecks[i].matcher(fixed).find())
3667            {
3668//                System.out.println(vulgarChecks[i]);
3669                return true;
3670            }
3671        }
3672        return false;
3673    }
3674
3675    /**
3676     * Generate a word from this FakeLanguageGen, using and changing the current seed.
3677     *
3678     * @param capitalize true if the word should start with a capital letter, false otherwise
3679     * @return a word in the fake language as a String
3680     */
3681    public String word(boolean capitalize) {
3682        return word(srng, capitalize);
3683    }
3684
3685    /**
3686     * Generate a word from this FakeLanguageGen using the specified long seed to use for a shared StatefulRNG.
3687     * If seed is the same, a FakeLanguageGen should produce the same word every time with this method.
3688     *
3689     * @param seed       the seed, as a long, to use for the randomized string building
3690     * @param capitalize true if the word should start with a capital letter, false otherwise
3691     * @return a word in the fake language as a String
3692     */
3693    public String word(long seed, boolean capitalize) {
3694        srng.setState(seed);
3695        return word(srng, capitalize);
3696    }
3697
3698    /**
3699     * Generate a word from this FakeLanguageGen using the specified RNG.
3700     *
3701     * @param rng        the RNG to use for the randomized string building
3702     * @param capitalize true if the word should start with a capital letter, false otherwise
3703     * @return a word in the fake language as a String
3704     */
3705    public String word(IRNG rng, boolean capitalize) {
3706        while (true) {
3707            sb.setLength(0);
3708            ender.setLength(0);
3709
3710            double syllableChance = rng.nextDouble(totalSyllableFrequency);
3711            int syllables = 1, i = 0;
3712            for (int s = 0; s < syllableFrequencies.length; s++) {
3713                if(syllableChance < syllableFrequencies[s])
3714                {
3715                    syllables = s + 1;
3716                    break;
3717                } else
3718                {
3719                    syllableChance -= syllableFrequencies[s];
3720                }
3721            }
3722            if (rng.nextDouble() < vowelStartFrequency) {
3723                sb.append(rng.getRandomElement(openingVowels));
3724                if (syllables == 1)
3725                    sb.append(rng.getRandomElement(closingConsonants));
3726                else
3727                    sb.append(rng.getRandomElement(midConsonants));
3728                i++;
3729            } else {
3730                sb.append(rng.getRandomElement(openingConsonants));
3731            }
3732            String close = "";
3733            boolean redouble = false;
3734            if (i < syllables) {
3735                if (rng.nextDouble() < syllableEndFrequency) {
3736                    close = rng.getRandomElement(closingSyllables);
3737                    if (close.contains("@") && (syllables & 1) == 0) {
3738                        redouble = true;
3739                        syllables >>= 1;
3740
3741                        //sb.append(close.replaceAll("@\\d", sb.toString()));
3742                    }
3743                    if (!close.contains("@"))
3744                        ender.append(close);
3745                    else if (rng.nextDouble() < vowelEndFrequency) {
3746                        ender.append(rng.getRandomElement(midVowels));
3747                        if (rng.nextDouble() < vowelSplitFrequency) {
3748                            ender.append(rng.getRandomElement(vowelSplitters))
3749                                    .append(rng.getRandomElement(midVowels));
3750                        }
3751                    }
3752                } else {
3753                    ender.append(rng.getRandomElement(midVowels));
3754                    if (rng.nextDouble() < vowelSplitFrequency) {
3755                        ender.append(rng.getRandomElement(vowelSplitters))
3756                                .append(rng.getRandomElement(midVowels));
3757                    }
3758                    if (rng.nextDouble() >= vowelEndFrequency) {
3759                        ender.append(rng.getRandomElement(closingConsonants));
3760                        if (rng.nextDouble() < syllableEndFrequency) {
3761                            close = rng.getRandomElement(closingSyllables);
3762                            if (close.contains("@") && (syllables & 1) == 0) {
3763                                redouble = true;
3764                                syllables >>= 1;
3765
3766                                //sb.append(close.replaceAll("@\\d", sb.toString()));
3767                            }
3768                            if (!close.contains("@"))
3769                                ender.append(close);
3770                        }
3771                    }
3772                }
3773                i += vowelClusters.matcher(ender).findAll().count();
3774
3775            }
3776
3777            for (; i < syllables; i++) {
3778                sb.append(rng.getRandomElement(midVowels));
3779                if (rng.nextDouble() < vowelSplitFrequency) {
3780                    sb.append(rng.getRandomElement(vowelSplitters))
3781                            .append(rng.getRandomElement(midVowels));
3782                }
3783                sb.append(rng.getRandomElement(midConsonants));
3784            }
3785
3786            sb.append(ender);
3787            if (redouble && i <= syllables + 1) {
3788                sb.append(close.replaceAll("@", sb.toString()));
3789            }
3790
3791            if (sanityChecks != null && !checkAll(sb, sanityChecks))
3792            {
3793                continue;
3794            }
3795
3796            for (int m = 0; m < modifiers.size(); m++) {
3797                modifiers.get(m).modify(rng, sb);
3798            }
3799
3800            if (capitalize)
3801                sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
3802
3803            if (clean && !checkAll(sb, vulgarChecks))
3804            {
3805                continue;
3806            }
3807            return sb.toString();
3808        }
3809    }
3810
3811    /**
3812     * Generate a word from this FakeLanguageGen with an approximate number of syllables using the specified long seed 
3813     * to use for a shared StatefulRNG.
3814     * If seed and the other parameters are the same, a FakeLanguageGen should produce the same word every time with
3815     * this method.
3816     *
3817     * @param seed       the seed, as a long, to use for the randomized string building
3818     * @param capitalize true if the word should start with a capital letter, false otherwise
3819     * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables
3820     * @return a word in the fake language as a String
3821     */
3822    public String word(long seed, boolean capitalize, int approxSyllables) {
3823        srng.setState(seed);
3824        return word(srng, capitalize, approxSyllables);
3825    }
3826
3827    /**
3828     * Generate a word from this FakeLanguageGen using the specified RNG with an approximate number of syllables.
3829     *
3830     * @param rng        the RNG to use for the randomized string building
3831     * @param capitalize true if the word should start with a capital letter, false otherwise
3832     * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables
3833     * @return a word in the fake language as a String
3834     */
3835    public String word(IRNG rng, boolean capitalize, int approxSyllables) {
3836        return word(rng, capitalize, approxSyllables, null);
3837    }
3838    /**
3839     * Generate a word from this FakeLanguageGen with an approximate number of syllables using the specified long seed 
3840     * to use for a shared StatefulRNG. This takes an array of {@link Pattern} objects (from RegExodus, not
3841     * java.util.regex) that should match invalid outputs, such as words that shouldn't be generated in some context due
3842     * to vulgarity or cultural matters. If seed and the other parameters are the same, a FakeLanguageGen should produce
3843     * the same word every time with this method.
3844     *
3845     * @param seed       the seed, as a long, to use for the randomized string building
3846     * @param capitalize true if the word should start with a capital letter, false otherwise
3847     * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables
3848     * @param additionalChecks an array of RegExodus Pattern objects that match invalid words (these may be additional vulgarity checks, for example)
3849     * @return a word in the fake language as a String
3850     */
3851    public String word(long seed, boolean capitalize, int approxSyllables, Pattern[] additionalChecks) {
3852        srng.setState(seed);
3853        return word(srng, capitalize, approxSyllables, additionalChecks);
3854    }
3855
3856    /**
3857     * Generate a word from this FakeLanguageGen using the specified RNG with an approximate number of syllables.
3858     * This takes an array of {@link Pattern} objects (from RegExodus, not java.util.regex) that should match invalid
3859     * outputs, such as words that shouldn't be generated in some context due to vulgarity or cultural matters.
3860     *
3861     * @param rng        the RNG to use for the randomized string building
3862     * @param capitalize true if the word should start with a capital letter, false otherwise
3863     * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables
3864     * @param additionalChecks an array of RegExodus Pattern objects that match invalid words (these may be additional vulgarity checks, for example)
3865     * @return a word in the fake language as a String
3866     */
3867    public String word(IRNG rng, boolean capitalize, int approxSyllables, Pattern[] additionalChecks) {
3868        if (approxSyllables <= 0) {
3869            sb.setLength(0);
3870            sb.append(rng.getRandomElement(openingVowels));
3871            for (int m = 0; m < modifiers.size(); m++) {
3872                modifiers.get(m).modify(rng, sb);
3873            }
3874            if (capitalize) sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
3875            return sb.toString();
3876        }
3877        while (true) {
3878            sb.setLength(0);
3879            ender.setLength(0);
3880            int i = 0;
3881            if (rng.nextDouble() < vowelStartFrequency) {
3882                sb.append(rng.getRandomElement(openingVowels));
3883                if (approxSyllables == 1 && closingConsonants.length > 0)
3884                    sb.append(rng.getRandomElement(closingConsonants));
3885                else if (midConsonants.length > 0)
3886                    sb.append(rng.getRandomElement(midConsonants));
3887                i++;
3888            } else if (openingConsonants.length > 0) {
3889                sb.append(rng.getRandomElement(openingConsonants));
3890            }
3891            String close = "";
3892            boolean redouble = false;
3893            if (i < approxSyllables) {
3894                if (closingSyllables.length > 0 && rng.nextDouble() < syllableEndFrequency) {
3895                    close = rng.getRandomElement(closingSyllables);
3896                    if (close.contains("@") && (approxSyllables & 1) == 0) {
3897                        redouble = true;
3898                        approxSyllables = approxSyllables >> 1;
3899
3900                        //sb.append(close.replaceAll("@\\d", sb.toString()));
3901                    }
3902                    if (!close.contains("@"))
3903                        ender.append(close);
3904                    else if (redouble && rng.nextDouble() < vowelEndFrequency) {
3905                        ender.append(rng.getRandomElement(midVowels));
3906                        if (vowelSplitters.length > 0 && rng.nextDouble() < vowelSplitFrequency) {
3907                            ender.append(rng.getRandomElement(vowelSplitters))
3908                                    .append(rng.getRandomElement(midVowels));
3909                        }
3910                    }
3911                } else {
3912                    ender.append(rng.getRandomElement(midVowels));
3913                    if (rng.nextDouble() < vowelSplitFrequency) {
3914                        ender.append(rng.getRandomElement(vowelSplitters))
3915                                .append(rng.getRandomElement(midVowels));
3916                    }
3917                    if (rng.nextDouble() >= vowelEndFrequency) {
3918                        ender.append(rng.getRandomElement(closingConsonants));
3919                        if (rng.nextDouble() < syllableEndFrequency) {
3920                            close = rng.getRandomElement(closingSyllables);
3921                            if (close.contains("@") && (approxSyllables & 1) == 0) {
3922                                redouble = true;
3923                                approxSyllables = approxSyllables >> 1;
3924
3925                                //sb.append(close.replaceAll("@\\d", sb.toString()));
3926                            }
3927                            if (!close.contains("@"))
3928                                ender.append(close);
3929                        }
3930                    }
3931                }
3932                i += vowelClusters.matcher(ender).findAll().count();
3933            }
3934
3935            for (; i < approxSyllables; i++) {
3936                sb.append(rng.getRandomElement(midVowels));
3937                if (rng.nextDouble() < vowelSplitFrequency) {
3938                    sb.append(rng.getRandomElement(vowelSplitters))
3939                            .append(rng.getRandomElement(midVowels));
3940                }
3941                sb.append(rng.getRandomElement(midConsonants));
3942            }
3943
3944            sb.append(ender);
3945            if (redouble && i <= approxSyllables + 1) {
3946                sb.append(close.replaceAll("@", sb.toString()));
3947            }
3948
3949            if (sanityChecks != null && !checkAll(sb, sanityChecks))
3950                continue;
3951
3952            for (int m = 0; m < modifiers.size(); m++) {
3953                modifiers.get(m).modify(rng, sb);
3954            }
3955
3956            if (clean && !checkAll(sb, vulgarChecks))
3957                continue;
3958
3959            if (additionalChecks != null && !checkAll(sb, additionalChecks))
3960                continue;
3961
3962            if (capitalize)
3963                sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
3964
3965            return sb.toString();
3966        }
3967    }
3968
3969    /**
3970     * Generate a word from this FakeLanguageGen using the specified StatefulRNG with an approximate number of
3971     * syllables, potentially setting the state of rng mid-way through the word to another seed from {@code reseeds}
3972     * more than once if the word is long enough. This overload is less likely to be used very often.
3973     *
3974     * @param rng        the StatefulRNG to use for the randomized string building
3975     * @param capitalize true if the word should start with a capital letter, false otherwise
3976     * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables
3977     * @param reseeds an array or varargs of additional long seeds to seed {@code rng} with mid-generation 
3978     * @return a word in the fake language as a String
3979     */
3980    public String word(IStatefulRNG rng, boolean capitalize, int approxSyllables, long... reseeds) {
3981        if (approxSyllables <= 0) {
3982            sb.setLength(0);
3983            sb.append(rng.getRandomElement(openingVowels));
3984            for (int m = 0; m < modifiers.size(); m++) {
3985                modifiers.get(m).modify(rng, sb);
3986            }
3987            if (capitalize) sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
3988            return sb.toString();
3989        }
3990        int numSeeds, fraction = 1;
3991        if (reseeds != null)
3992            numSeeds = Math.min(reseeds.length, approxSyllables - 1);
3993        else numSeeds = 0;
3994        while (true) {
3995            sb.setLength(0);
3996            ender.setLength(0);
3997            int i = 0;
3998            if (rng.nextDouble() < vowelStartFrequency) {
3999                sb.append(rng.getRandomElement(openingVowels));
4000                if (approxSyllables == 1)
4001                    sb.append(rng.getRandomElement(closingConsonants));
4002                else
4003                    sb.append(rng.getRandomElement(midConsonants));
4004                i++;
4005            } else {
4006                sb.append(rng.getRandomElement(openingConsonants));
4007            }
4008            String close = "";
4009            boolean redouble = false;
4010            if (i < approxSyllables) {
4011                if (numSeeds > 0 && i > 0 && i == approxSyllables * fraction / (1 + numSeeds))
4012                    rng.setState(reseeds[fraction++ - 1]);
4013                if (rng.nextDouble() < syllableEndFrequency) {
4014                    close = rng.getRandomElement(closingSyllables);
4015                    if (close.contains("@") && (approxSyllables & 1) == 0) {
4016                        redouble = true;
4017                        approxSyllables = approxSyllables >> 1;
4018                    }
4019                    if (!close.contains("@"))
4020                        ender.append(close);
4021                    else if (rng.nextDouble() < vowelEndFrequency) {
4022                        ender.append(rng.getRandomElement(midVowels));
4023                        if (rng.nextDouble() < vowelSplitFrequency) {
4024                            ender.append(rng.getRandomElement(vowelSplitters))
4025                                    .append(rng.getRandomElement(midVowels));
4026                        }
4027                    }
4028                } else {
4029                    ender.append(rng.getRandomElement(midVowels));
4030                    if (rng.nextDouble() < vowelSplitFrequency) {
4031                        ender.append(rng.getRandomElement(vowelSplitters))
4032                                .append(rng.getRandomElement(midVowels));
4033                    }
4034                    if (rng.nextDouble() >= vowelEndFrequency) {
4035                        ender.append(rng.getRandomElement(closingConsonants));
4036                        if (rng.nextDouble() < syllableEndFrequency) {
4037                            close = rng.getRandomElement(closingSyllables);
4038                            if (close.contains("@") && (approxSyllables & 1) == 0) {
4039                                redouble = true;
4040                                approxSyllables = approxSyllables >> 1;
4041
4042                                //sb.append(close.replaceAll("@\\d", sb.toString()));
4043                            }
4044                            if (!close.contains("@"))
4045                                ender.append(close);
4046                        }
4047                    }
4048                }
4049                i += vowelClusters.matcher(ender).findAll().count();
4050            }
4051
4052            for (; i < approxSyllables; i++) {
4053                if (numSeeds > 0 && i > 0 && i == approxSyllables * fraction / (1 + numSeeds))
4054                    rng.setState(reseeds[fraction++ - 1]);
4055                sb.append(rng.getRandomElement(midVowels));
4056                if (rng.nextDouble() < vowelSplitFrequency) {
4057                    sb.append(rng.getRandomElement(vowelSplitters))
4058                            .append(rng.getRandomElement(midVowels));
4059                }
4060                sb.append(rng.getRandomElement(midConsonants));
4061            }
4062
4063            sb.append(ender);
4064            if (redouble && i <= approxSyllables + 1) {
4065                sb.append(close.replaceAll("@", sb.toString()));
4066            }
4067
4068            if (sanityChecks != null && !checkAll(sb, sanityChecks))
4069                continue;
4070
4071            for (int m = 0; m < modifiers.size(); m++) {
4072                modifiers.get(m).modify(rng, sb);
4073            }
4074
4075            if (capitalize)
4076                sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
4077
4078            if (clean && !checkAll(sb, vulgarChecks))
4079                continue;
4080            return sb.toString();
4081        }
4082    }
4083
4084    private static final String[] mid = {",", ",", ",", ";"}, end = {".", ".", ".", "!", "?", "..."};
4085
4086    /**
4087     * Generate a sentence from this FakeLanguageGen, using and changing the current seed, with the length in words
4088     * between minWords and maxWords, both inclusive. This can use commas and semicolons between words, and can end a
4089     * sentence with ".", "!", "?", or "...".
4090     *
4091     * @param minWords an int for the minimum number of words in a sentence; should be at least 1
4092     * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords
4093     * @return a sentence in the fake language as a String
4094     */
4095    public String sentence(int minWords, int maxWords) {
4096        return sentence(srng, minWords, maxWords, mid, end, 0.2);
4097    }
4098
4099    /**
4100     * Generate a sentence from this FakeLanguageGen, using the given seed as a long, with the length in words between
4101     * minWords and maxWords, both inclusive. This can use commas and semicolons between words, and can end a
4102     * sentence with ".", "!", "?", or "...".
4103     *
4104     * @param seed     the seed, as a long, for the randomized string building
4105     * @param minWords an int for the minimum number of words in a sentence; should be at least 1
4106     * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords
4107     * @return a sentence in the fake language as a String
4108     */
4109    public String sentence(long seed, int minWords, int maxWords) {
4110        srng.setState(seed);
4111        return sentence(srng, minWords, maxWords);
4112    }
4113
4114    /**
4115     * Generate a sentence from this FakeLanguageGen, using the given RNG, with the length in words between minWords and
4116     * maxWords, both inclusive. This can use commas and semicolons between words, and can end a
4117     * sentence with ".", "!", "?", or "...".
4118     * 
4119     * @param rng      the RNG to use for the randomized string building
4120     * @param minWords an int for the minimum number of words in a sentence; should be at least 1
4121     * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords
4122     * @return a sentence in the fake language as a String
4123     */
4124    public String sentence(IRNG rng, int minWords, int maxWords) {
4125        return sentence(rng, minWords, maxWords, mid,
4126                end, 0.2);
4127    }
4128
4129    /**
4130     * Generate a sentence from this FakeLanguageGen, using and changing the current seed. The sentence's length in
4131     * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from
4132     * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency}
4133     * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}.
4134     *
4135     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4136     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4137     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4138     *                                space in the middle of a sentence
4139     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4140     *                                the very end of a sentence
4141     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4142     *                                midPunctuation should be inserted before spaces
4143     * @return a sentence in the fake language as a String
4144     */
4145    public String sentence(int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4146                           double midPunctuationFrequency) {
4147        return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency);
4148    }
4149    /**
4150     * Generate a sentence from this FakeLanguageGen, using the given seed as a long. The sentence's length in
4151     * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from
4152     * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency}
4153     * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}.
4154     *
4155     * @param seed                    the seed, as a long, for the randomized string building
4156     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4157     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4158     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4159     *                                space in the middle of a sentence
4160     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4161     *                                the very end of a sentence
4162     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4163     *                                midPunctuation should be inserted before spaces
4164     * @return a sentence in the fake language as a String
4165     */
4166    public String sentence(long seed, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4167                           double midPunctuationFrequency) {
4168        srng.setState(seed);
4169        return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency);
4170    }
4171
4172    /**
4173     * Generate a sentence from this FakeLanguageGen using the specific RNG. The sentence's length in
4174     * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from
4175     * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency}
4176     * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}.
4177     *
4178     * @param rng                     the RNG to use for the randomized string building
4179     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4180     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4181     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4182     *                                space in the middle of a sentence
4183     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4184     *                                the very end of a sentence
4185     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4186     *                                midPunctuation should be inserted before spaces
4187     * @return a sentence in the fake language as a String
4188     */
4189    public String sentence(IRNG rng, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4190                           double midPunctuationFrequency) {
4191        if (minWords < 1)
4192            minWords = 1;
4193        if (minWords > maxWords)
4194            maxWords = minWords;
4195        if (midPunctuationFrequency > 1.0) {
4196            midPunctuationFrequency = 1.0 / midPunctuationFrequency;
4197        }
4198        ssb.setLength(0);
4199        ssb.ensureCapacity(12 * maxWords);
4200        ssb.append(word(rng, true));
4201        for (int i = 1; i < minWords; i++) {
4202            if (rng.nextDouble() < midPunctuationFrequency) {
4203                ssb.append(rng.getRandomElement(midPunctuation));
4204            }
4205            ssb.append(' ').append(word(rng, false));
4206        }
4207        for (int i = minWords; i < maxWords && rng.nextInt(2 * maxWords) > i; i++) {
4208            if (rng.nextDouble() < midPunctuationFrequency) {
4209                ssb.append(rng.getRandomElement(midPunctuation));
4210            }
4211            ssb.append(' ').append(word(rng, false));
4212        }
4213        if (endPunctuation != null && endPunctuation.length > 0)
4214            ssb.append(rng.getRandomElement(endPunctuation));
4215        return ssb.toString();
4216    }
4217
4218    /**
4219     * Generate a sentence from this FakeLanguageGen that fits in the given length limit. The sentence's length in
4220     * words will be between minWords and maxWords, both inclusive, unless it would exceed maxChars, in which case it is
4221     * truncated. It will put one of the punctuation Strings from {@code midPunctuation} between two words (before the
4222     * space) at a frequency of {@code midPunctuationFrequency} (between 0 and 1), and will end the sentence with one
4223     * String chosen from {@code endPunctuation}.
4224     *
4225     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4226     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4227     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4228     *                                space in the middle of a sentence
4229     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4230     *                                the very end of a sentence
4231     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4232     *                                midPunctuation should be inserted before spaces
4233     * @param maxChars                the longest string length this can produce; should be at least {@code 6 * minWords}
4234     * @return a sentence in the fake language as a String
4235     */
4236    public String sentence(int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4237                           double midPunctuationFrequency, int maxChars) {
4238        return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency, maxChars);
4239    }
4240
4241    /**
4242     * Generate a sentence from this FakeLanguageGen that fits in the given length limit, using the given seed as a
4243     * long. The sentence's length in words will be between minWords and maxWords, both inclusive, unless it would
4244     * exceed maxChars, in which case it is truncated. It will put one of the punctuation Strings from
4245     * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency}
4246     * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}.
4247     *
4248     * @param seed                    the seed, as a long, for the randomized string building
4249     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4250     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4251     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4252     *                                space in the middle of a sentence
4253     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4254     *                                the very end of a sentence
4255     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4256     *                                midPunctuation should be inserted before spaces
4257     * @param maxChars                the longest string length this can produce; should be at least {@code 6 * minWords}
4258     * @return a sentence in the fake language as a String
4259     */
4260    public String sentence(long seed, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4261                           double midPunctuationFrequency, int maxChars) {
4262        srng.setState(seed);
4263        return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency, maxChars);
4264    }
4265
4266    /**
4267     * Generate a sentence from this FakeLanguageGen using the given RNG that fits in the given length limit. The
4268     * sentence's length in words will be between minWords and maxWords, both inclusive, unless it would exceed
4269     * maxChars, in which case it is truncated. It will put one of the punctuation Strings from {@code midPunctuation}
4270     * between two words (before the space) at a frequency of {@code midPunctuationFrequency} (between 0 and 1), and
4271     * will end the sentence with one String chosen from {@code endPunctuation}.
4272     *
4273     * @param rng                     the RNG to use for the randomized string building
4274     * @param minWords                an int for the minimum number of words in a sentence; should be at least 1
4275     * @param maxWords                an int for the maximum number of words in a sentence; should be at least equal to minWords
4276     * @param midPunctuation          a String array where each element is a comma, semicolon, or the like that goes before a
4277     *                                space in the middle of a sentence
4278     * @param endPunctuation          a String array where each element is a period, question mark, or the like that goes at
4279     *                                the very end of a sentence
4280     * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from
4281     *                                midPunctuation should be inserted before spaces
4282     * @param maxChars                the longest string length this can produce; should be at least {@code 6 * minWords}
4283     * @return a sentence in the fake language as a String
4284     */
4285    public String sentence(IRNG rng, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation,
4286                           double midPunctuationFrequency, int maxChars) {
4287        if(maxChars < 0)
4288            return sentence(rng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency);
4289        if (minWords < 1)
4290            minWords = 1;
4291        if (minWords > maxWords)
4292            maxWords = minWords;
4293        if (midPunctuationFrequency > 1.0) {
4294            midPunctuationFrequency = 1.0 / midPunctuationFrequency;
4295        }
4296        if (maxChars < 4)
4297            return "!";
4298        if (maxChars <= 5 * minWords) {
4299            minWords = 1;
4300            maxWords = 1;
4301        }
4302        int frustration = 0;
4303        ssb.setLength(0); 
4304        ssb.ensureCapacity(maxChars);
4305        String next = word(rng, true);
4306        while (next.length() >= maxChars - 1 && frustration < 50) {
4307            next = word(rng, true);
4308            frustration++;
4309        }
4310        if (frustration >= 50) return "!";
4311        ssb.append(next);
4312        for (int i = 1; i < minWords && ssb.length() < maxChars - 7; i++) {
4313            if (rng.nextDouble() < midPunctuationFrequency && ssb.length() < maxChars - 3) {
4314                ssb.append(rng.getRandomElement(midPunctuation));
4315            }
4316            next = word(rng, false);
4317            while (ssb.length() + next.length() >= maxChars - 2 && frustration < 50) {
4318                next = word(rng, false);
4319                frustration++;
4320            }
4321            if (frustration >= 50) break;
4322            ssb.append(' ').append(next);
4323        }
4324        for (int i = minWords; i < maxWords && ssb.length() < maxChars - 7 && rng.nextInt(2 * maxWords) > i && frustration < 50; i++) {
4325            if (rng.nextDouble() < midPunctuationFrequency && ssb.length() < maxChars - 3) {
4326                ssb.append(rng.getRandomElement(midPunctuation));
4327            }
4328            next = word(rng, false);
4329            while (ssb.length() + next.length() >= maxChars - 2 && frustration < 50) {
4330                next = word(rng, false);
4331                frustration++;
4332            }
4333            if (frustration >= 50) break;
4334            ssb.append(' ');
4335            ssb.append(next);
4336        }
4337
4338        if (endPunctuation != null && endPunctuation.length > 0) {
4339
4340            next = rng.getRandomElement(endPunctuation);
4341            if (ssb.length() + next.length() >= maxChars)
4342                ssb.append('.');
4343            else
4344                ssb.append(next);
4345        }
4346
4347        if (ssb.length() > maxChars)
4348            return "!";
4349        return ssb.toString();
4350    }
4351
4352    protected String[] merge1000(IRNG rng, String[] me, String[] other, double otherInfluence) {
4353        if (other.length <= 0 && me.length <= 0)
4354            return new String[]{};
4355        String[] ret = new String[1000];
4356        int otherCount = (int) (1000 * otherInfluence);
4357        int idx = 0;
4358        if (other.length > 0) {
4359            String[] tmp = new String[other.length];
4360            rng.shuffle(other, tmp);
4361            for (idx = 0; idx < otherCount; idx++) {
4362                ret[idx] = tmp[idx % tmp.length];
4363            }
4364        }
4365        if (me.length > 0) {
4366            String[] tmp = new String[me.length];
4367            rng.shuffle(me, tmp);
4368            for (; idx < 1000; idx++) {
4369                ret[idx] = tmp[idx % tmp.length];
4370            }
4371        } else {
4372            for (; idx < 1000; idx++) {
4373                ret[idx] = other[idx % other.length];
4374            }
4375        }
4376        return ret;
4377    }
4378
4379
4380    protected String[] accentVowels(IRNG rng, String[] me, double influence) {
4381        String[] ret = new String[1000];
4382        int otherCount = (int) (1000 * influence);
4383        int idx;
4384        Matcher matcher;
4385        if (me.length > 0) {
4386            String[] tmp = new String[me.length];
4387            rng.shuffle(me, tmp);
4388            for (idx = 0; idx < otherCount; idx++) {
4389                ret[idx] = tmp[idx % tmp.length]
4390                        .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)])
4391                        .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)])
4392                        .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)])
4393                        .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)])
4394                        .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)]);
4395                matcher = repeats.matcher(ret[idx]);
4396                if (matcher.find()) {
4397                    ret[idx] = matcher.replaceAll(rng.getRandomElement(me));
4398                }
4399            }
4400            for (; idx < 1000; idx++) {
4401                ret[idx] = tmp[idx % tmp.length];
4402            }
4403        } else
4404            return new String[]{};
4405        return ret;
4406    }
4407
4408    protected String[] accentConsonants(IRNG rng, String[] me, double influence) {
4409        String[] ret = new String[1000];
4410        int otherCount = (int) (1000 * influence);
4411        int idx;
4412        Matcher matcher;
4413        if (me.length > 0) {
4414            String[] tmp = new String[me.length];
4415            rng.shuffle(me, tmp);
4416            for (idx = 0; idx < otherCount; idx++) {
4417                ret[idx] = tmp[idx % tmp.length]
4418                        //0
4419                        .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)])
4420                        .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)])
4421                        .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)])
4422                        .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)])
4423                        .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)])
4424                        .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)])
4425                        .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)])
4426                        .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)])
4427                        //9
4428                        .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)])
4429                        //11
4430                        //12
4431                        .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)])
4432                        .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)])
4433                        .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)])
4434                        //16
4435                        .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)])
4436                        //18
4437                        .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)])
4438                        .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]);
4439
4440                matcher = repeats.matcher(ret[idx]);
4441                if (matcher.find()) {
4442                    ret[idx] = matcher.replaceAll(rng.getRandomElement(me));
4443                }
4444            }
4445            for (; idx < 1000; idx++) {
4446                ret[idx] = tmp[idx % tmp.length];
4447            }
4448        } else
4449            return new String[]{};
4450        return ret;
4451    }
4452
4453    protected String[] accentBoth(IRNG rng, String[] me, double vowelInfluence, double consonantInfluence) {
4454        String[] ret = new String[1000];
4455        int idx;
4456        Matcher matcher;
4457        if (me.length > 0) {
4458            String[] tmp = new String[me.length];
4459            rng.shuffle(me, tmp);
4460            for (idx = 0; idx < 1000; idx++) {
4461                boolean subVowel = rng.nextDouble() < vowelInfluence, subCon = rng.nextDouble() < consonantInfluence;
4462                if (subVowel && subCon) {
4463                    ret[idx] = tmp[idx % tmp.length]
4464                            .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)])
4465                            .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)])
4466                            .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)])
4467                            .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)])
4468                            .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)])
4469
4470                            //0
4471                            .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)])
4472                            .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)])
4473                            .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)])
4474                            .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)])
4475                            .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)])
4476                            .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)])
4477                            .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)])
4478                            .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)])
4479                            //9
4480                            .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)])
4481                            //11
4482                            //12
4483                            .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)])
4484                            .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)])
4485                            .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)])
4486                            //16
4487                            .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)])
4488                            //18
4489                            .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)])
4490                            .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]);
4491
4492                    matcher = repeats.matcher(ret[idx]);
4493                    if (matcher.find()) {
4494                        ret[idx] = matcher.replaceAll(rng.getRandomElement(me));
4495                    }
4496                } else if (subVowel) {
4497                    ret[idx] = tmp[idx % tmp.length]
4498                            .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)])
4499                            .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)])
4500                            .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)])
4501                            .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)])
4502                            .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)]);
4503
4504                    matcher = repeats.matcher(ret[idx]);
4505                    if (matcher.find()) {
4506                        ret[idx] = matcher.replaceAll(rng.getRandomElement(me));
4507                    }
4508                } else if (subCon) {
4509                    ret[idx] = tmp[idx % tmp.length]
4510                            //0
4511                            .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)])
4512                            .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)])
4513                            .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)])
4514                            .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)])
4515                            .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)])
4516                            .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)])
4517                            .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)])
4518                            .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)])
4519                            //9
4520                            .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)])
4521                            //11
4522                            //12
4523                            .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)])
4524                            .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)])
4525                            .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)])
4526                            //16
4527                            .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)])
4528                            //18
4529                            .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)])
4530                            .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]);
4531
4532                    matcher = repeats.matcher(ret[idx]);
4533                    if (matcher.find()) {
4534                        ret[idx] = matcher.replaceAll(rng.getRandomElement(me));
4535                    }
4536                } else ret[idx] = tmp[idx % tmp.length];
4537
4538            }
4539        } else
4540            return new String[]{};
4541        return ret;
4542    }
4543
4544    /**
4545     * Makes a new FakeLanguageGen that mixes this object with {@code other}, mingling the consonants and vowels they
4546     * use as well as any word suffixes or other traits, and favoring the qualities in {@code other} by
4547     * {@code otherInfluence}, which will value both languages evenly if it is 0.5 . 
4548     * <br>
4549     * You should generally prefer {@link #mix(double, FakeLanguageGen, double, Object...)} or
4550     * {@link #mixAll(Object...)} if you ever mix 3 or more languages. Chaining this mix() method can be very
4551     * counter-intuitive because the weights are relative, while in the other mix() and mixAll() they are absolute.
4552     * @param other another FakeLanguageGen to mix along with this one into a new language
4553     * @param otherInfluence how much other should affect the pair, with 0.5 being equal and 1.0 being only other used
4554     * @return a new FakeLanguageGen with traits from both languages
4555     */
4556    public FakeLanguageGen mix(FakeLanguageGen other, double otherInfluence) {
4557        otherInfluence = Math.max(0.0, Math.min(otherInfluence, 1.0));
4558        double myInfluence = 1.0 - otherInfluence;
4559
4560        GWTRNG rng = new GWTRNG(hashCode(), other.hashCode() ^ NumberTools.doubleToMixedIntBits(otherInfluence));
4561
4562        String[] ov = merge1000(rng, openingVowels, other.openingVowels, otherInfluence),
4563                mv = merge1000(rng, midVowels, other.midVowels, otherInfluence),
4564                oc = merge1000(rng, openingConsonants, other.openingConsonants, otherInfluence *
4565                        Math.max(0.0, Math.min(1.0, 1.0 - other.vowelStartFrequency + vowelStartFrequency))),
4566                mc = merge1000(rng, midConsonants, other.midConsonants, otherInfluence),
4567                cc = merge1000(rng, closingConsonants, other.closingConsonants, otherInfluence *
4568                        Math.max(0.0, Math.min(1.0, 1.0 - other.vowelEndFrequency + vowelEndFrequency))),
4569                cs = merge1000(rng, closingSyllables, other.closingSyllables, otherInfluence *
4570                        Math.max(0.0, Math.min(1.0, other.syllableEndFrequency - syllableEndFrequency))),
4571                splitters = merge1000(rng, vowelSplitters, other.vowelSplitters, otherInfluence);
4572
4573        double[] fr = new double[Math.max(syllableFrequencies.length, other.syllableFrequencies.length)];
4574        System.arraycopy(syllableFrequencies, 0, fr, 0, syllableFrequencies.length);
4575        for (int i = 0; i < other.syllableFrequencies.length; i++) {
4576            fr[i] += other.syllableFrequencies[i];
4577        }
4578        ArrayList<Modifier> mods = new ArrayList<>(modifiers.size() + other.modifiers.size());
4579        mods.addAll(modifiers);
4580        mods.addAll(other.modifiers);
4581        return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, splitters, fr,
4582                vowelStartFrequency * myInfluence + other.vowelStartFrequency * otherInfluence,
4583                vowelEndFrequency * myInfluence + other.vowelEndFrequency * otherInfluence,
4584                vowelSplitFrequency * myInfluence + other.vowelSplitFrequency * otherInfluence,
4585                syllableEndFrequency * myInfluence + other.syllableEndFrequency * otherInfluence,
4586                (sanityChecks == null) ? other.sanityChecks : sanityChecks, true, mods)
4587                .setName(otherInfluence > 0.5 ? other.name + "/" + name : name + "/" + other.name);
4588    }
4589
4590    private static double readDouble(Object o) {
4591        if (o instanceof Double) return (Double) o;
4592        else if (o instanceof Float) return (Float) o;
4593        else if (o instanceof Long) return ((Long) o).doubleValue();
4594        else if (o instanceof Integer) return (Integer) o;
4595        else if (o instanceof Short) return (Short) o;
4596        else if (o instanceof Byte) return (Byte) o;
4597        else if (o instanceof Character) return (Character) o;
4598        return 0.0;
4599    }
4600
4601    /**
4602     * Produces a FakeLanguageGen by mixing this FakeLanguageGen with one or more other FakeLanguageGen objects. Takes
4603     * a weight for this, another FakeLanguageGen, a weight for that FakeLanguageGen, then a possibly-empty group of
4604     * FakeLanguageGen parameters and the weights for those parameters. If other1 is null or if pairs has been given a
4605     * value of null instead of the normal (possibly empty) array of Objects, then this simply returns a copy of this
4606     * FakeLanguageGen. Otherwise, it will at least mix this language with other1 using the given weights for each.
4607     * If pairs is not empty, it has special requirements for what types it allows and in what order, but does no type
4608     * checking. Specifically, pairs requires the first Object to be a FakeLanguageGen, the next to be a number of some
4609     * kind that will be the weight for the previous FakeLanguageGen(this method can handle non-Double weights, and
4610     * converts them to Double if needed), and every two parameters after that to follow the same order and pattern
4611     * (FakeLanguageGen, then number, then FakeLanguageGen, then number...). Weights are absolute, and don't depend on
4612     * earlier weights, which is the case when chaining the {@link #mix(FakeLanguageGen, double)} method. This makes
4613     * reasoning about the ideal weights for multiple mixed languages easier; to mix 3 languages equally you can use
4614     * 3 equal weights with this, whereas with mix chaining you would need to mix the first two with 0.5 and the third
4615     * with 0.33 .
4616     * <br>
4617     * It's up to you whether you want to use {@link #mixAll(Object...)} or this method; they call the same code and
4618     * produce the same result, including the summary for serialization support. You probably shouldn't use
4619     * {@link #mix(FakeLanguageGen, double)} with two arguments in new code, since it's easy to make mistakes when
4620     * mixing three or more languages (calling that twice or more).
4621     *
4622     * @param myWeight the weight to assign this FakeLanguageGen in the mix
4623     * @param other1   another FakeLanguageGen to mix in; if null, this method will abort and return {@link #copy()}
4624     * @param weight1  the weight to assign other1 in the mix
4625     * @param pairs    may be empty, not null; otherwise must alternate between FakeLanguageGen and number (weight) elements
4626     * @return a FakeLanguageGen produced by mixing this with any FakeLanguageGen arguments by the given weights
4627     */
4628    public FakeLanguageGen mix(double myWeight, FakeLanguageGen other1, double weight1, Object... pairs) {
4629        if (other1 == null || pairs == null)
4630            return copy();
4631        OrderedSet<Modifier> mods = new OrderedSet<>(modifiers);
4632        FakeLanguageGen mixer = removeModifiers();
4633        FakeLanguageGen[] languages = new FakeLanguageGen[2 + (pairs.length >>> 1)];
4634        double[] weights = new double[languages.length];
4635        String[] summaries = new String[languages.length];
4636        boolean summarize = true;
4637        double total = 0.0, current, weight;
4638        languages[0] = mixer;
4639        total += weights[0] = myWeight;
4640        if ((summaries[0] = mixer.summary) == null) summarize = false;
4641        mods.addAll(other1.modifiers);
4642        languages[1] = other1.removeModifiers();
4643        total += weights[1] = weight1;
4644        if (summarize && (summaries[1] = languages[1].summary) == null) summarize = false;
4645        for (int i = 1, p = 2; i < pairs.length; i += 2, p++) {
4646            if (pairs[i] == null || pairs[i - 1] == null)
4647                continue;
4648            languages[p] = ((FakeLanguageGen) pairs[i - 1]).removeModifiers();
4649            total += weights[p] = readDouble(pairs[i]);
4650            if (summarize && (summaries[p] = languages[p].summary) == null) summarize = false;
4651        }
4652        if (total == 0)
4653            return copy();
4654        current = myWeight / total;
4655        for (int i = 1; i < languages.length; i++) {
4656            if ((weight = weights[i]) > 0)
4657                mixer = mixer.mix(languages[i], weight / total / (current += weight / total));
4658        }
4659        if (summarize) {
4660            sb.setLength(0);
4661            String c;
4662            int idx;
4663            for (int i = 0; i < summaries.length; i++) {
4664                c = summaries[i];
4665                idx = c.indexOf('@');
4666                if (idx >= 0) {
4667                    sb.append(c, 0, idx + 1).append(weights[i]);
4668                    if (i < summaries.length - 1)
4669                        sb.append('~');
4670                }
4671            }
4672            for (int i = 0; i < mods.size(); i++) {
4673                sb.append('℗').append(mods.getAt(i).serializeToString());
4674            }
4675            return mixer.addModifiers(mods).summarize(sb.toString());
4676        } else
4677            return mixer.addModifiers(mods);
4678    }
4679
4680    /**
4681     * Produces a FakeLanguageGen from a group of FakeLanguageGen parameters and the weights for those parameters.
4682     * Requires the first Object in pairs to be a FakeLanguageGen, the next to be a number of some kind that will be the
4683     * weight for the previous FakeLanguageGen(this method can handle non-Double weights, and converts them to Double
4684     * if needed), and every two parameters after that to follow the same order and pattern (FakeLanguageGen, then
4685     * number, then FakeLanguageGen, then number...). There should be at least 4 elements in pairs, half of them
4686     * languages and half of them weights, for this to do any mixing, but it can produce a result with as little as one
4687     * FakeLanguageGen (returning a copy of the first FakeLanguageGen). Weights are absolute, and don't depend on
4688     * earlier weights, which is the case when chaining the {@link #mix(FakeLanguageGen, double)} method. This makes
4689     * reasoning about the ideal weights for multiple mixed languages easier; to mix 3 languages equally you can use
4690     * 3 equal weights with this, whereas with mix chaining you would need to mix the first two with 0.5 and the third
4691     * with 0.33 .
4692     * <br>
4693     * This is probably the most intuitive way to mix languages here, though there's also
4694     * {@link #mix(double, FakeLanguageGen, double, Object...)}, which is very similar but doesn't take its parameters
4695     * in quite the same way (it isn't static, and treats the FakeLanguageGen object like the first item in pairs here).
4696     * Used internally in the deserialization code.
4697     *
4698     * @param pairs should have at least one item, and must alternate between FakeLanguageGen and number (weight) elements
4699     * @return a FakeLanguageGen produced by mixing any FakeLanguageGen arguments by the given weights
4700     */
4701    public static FakeLanguageGen mixAll(Object... pairs) {
4702        int len;
4703        if (pairs == null || (len = pairs.length) <= 0)
4704            return ENGLISH.copy();
4705        if (len < 4)
4706            return ((FakeLanguageGen) pairs[0]).copy();
4707        Object[] pairs2 = new Object[len - 4];
4708        if (len > 4)
4709            System.arraycopy(pairs, 4, pairs2, 0, len - 4);
4710        return ((FakeLanguageGen) pairs[0]).mix(readDouble(pairs[1]), (FakeLanguageGen) pairs[2], readDouble(pairs[3]), pairs2);
4711    }
4712
4713    /**
4714     * Produces a new FakeLanguageGen like this one but with extra vowels and/or consonants possible, adding from a wide
4715     * selection of accented vowels (if vowelInfluence is above 0.0) and/or consonants (if consonantInfluence is above
4716     * 0.0). This may produce a gibberish-looking language with no rhyme or reason to the accents, and generally
4717     * consonantInfluence should be very low if it is above 0 at all.
4718     * @param vowelInfluence between 0.0 and 1.0; if 0.0 will not affect vowels at all
4719     * @param consonantInfluence between 0.0 and 1.0; if 0.0 will not affect consonants at all
4720     * @return a new FakeLanguageGen with modifications to add accented vowels and/or consonants
4721     */
4722    public FakeLanguageGen addAccents(double vowelInfluence, double consonantInfluence) {
4723        vowelInfluence = Math.max(0.0, Math.min(vowelInfluence, 1.0));
4724        consonantInfluence = Math.max(0.0, Math.min(consonantInfluence, 1.0));
4725        GWTRNG rng = new GWTRNG(hashCode(),
4726                NumberTools.doubleToMixedIntBits(vowelInfluence)
4727                        ^ NumberTools.doubleToMixedIntBits(consonantInfluence));
4728        String[] ov = accentVowels(rng, openingVowels, vowelInfluence),
4729                mv = accentVowels(rng, midVowels, vowelInfluence),
4730                oc = accentConsonants(rng, openingConsonants, consonantInfluence),
4731                mc = accentConsonants(rng, midConsonants, consonantInfluence),
4732                cc = accentConsonants(rng, closingConsonants, consonantInfluence),
4733                cs = accentBoth(rng, closingSyllables, vowelInfluence, consonantInfluence);
4734
4735
4736        return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, vowelSplitters, syllableFrequencies,
4737                vowelStartFrequency,
4738                vowelEndFrequency,
4739                vowelSplitFrequency,
4740                syllableEndFrequency, sanityChecks, clean, modifiers).setName(name + "-Bònüs");
4741    }
4742
4743    private static String[] copyStrings(String[] start) {
4744        String[] next = new String[start.length];
4745        System.arraycopy(start, 0, next, 0, start.length);
4746        return next;
4747    }
4748
4749    /**
4750     * Useful for cases with limited fonts, this produces a new FakeLanguageGen like this one but with all accented
4751     * characters removed (including almost all non-ASCII Latin-alphabet characters, but only some Greek and Cyrillic
4752     * characters). This will replace letters like "A with a ring" with just "A". Some of the letters chosen as
4753     * replacements aren't exact matches.
4754     * @return a new FakeLanguageGen like this one but without accented letters
4755     */
4756    public FakeLanguageGen removeAccents() {
4757
4758        String[] ov = copyStrings(openingVowels),
4759                mv = copyStrings(midVowels),
4760                oc = copyStrings(openingConsonants),
4761                mc = copyStrings(midConsonants),
4762                cc = copyStrings(closingConsonants),
4763                cs = copyStrings(closingSyllables);
4764        for (int i = 0; i < ov.length; i++) {
4765            ov[i] = removeAccents(openingVowels[i]).toString();
4766        }
4767        for (int i = 0; i < mv.length; i++) {
4768            mv[i] = removeAccents(midVowels[i]).toString();
4769        }
4770        for (int i = 0; i < oc.length; i++) {
4771            oc[i] = removeAccents(openingConsonants[i]).toString();
4772        }
4773        for (int i = 0; i < mc.length; i++) {
4774            mc[i] = removeAccents(midConsonants[i]).toString();
4775        }
4776        for (int i = 0; i < cc.length; i++) {
4777            cc[i] = removeAccents(closingConsonants[i]).toString();
4778        }
4779        for (int i = 0; i < cs.length; i++) {
4780            cs[i] = removeAccents(closingSyllables[i]).toString();
4781        }
4782
4783        return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, vowelSplitters, syllableFrequencies,
4784                vowelStartFrequency,
4785                vowelEndFrequency,
4786                vowelSplitFrequency,
4787                syllableEndFrequency, sanityChecks, clean, modifiers);
4788    }
4789
4790    /**
4791     * Returns the name of this FakeLanguageGen, such as "English" or "Deep Speech", if one was registered for this.
4792     * In the case of hybrid languages produced by {@link #mix(FakeLanguageGen, double)} or related methods, this should
4793     * produce a String like "English/French" (or "English/French/Maori" if more are mixed together). If no name was
4794     * registered, this will return "Nameless Language".
4795     * @return the human-readable name of this language, or "Nameless Language" if none is known
4796     */
4797    public String getName() {
4798        return name;
4799    }
4800    private FakeLanguageGen setName(final String languageName)
4801    {
4802        name = languageName;
4803        return this;
4804    }
4805
4806    /**
4807     * Adds the specified Modifier objects from a Collection to a copy of this FakeLanguageGen and returns it.
4808     * You can obtain a Modifier with the static constants in the FakeLanguageGen.Modifier nested class, the
4809     * FakeLanguageGen.modifier() method, or Modifier's constructor.
4810     *
4811     * @param mods an array or vararg of Modifier objects
4812     * @return a copy of this with the Modifiers added
4813     */
4814    public FakeLanguageGen addModifiers(Collection<Modifier> mods) {
4815        FakeLanguageGen next = copy();
4816        next.modifiers.addAll(mods);
4817        return next;
4818    }
4819
4820    /**
4821     * Adds the specified Modifier objects to a copy of this FakeLanguageGen and returns it.
4822     * You can obtain a Modifier with the static constants in the FakeLanguageGen.Modifier nested class, the
4823     * FakeLanguageGen.modifier() method, or Modifier's constructor.
4824     *
4825     * @param mods an array or vararg of Modifier objects
4826     * @return a copy of this with the Modifiers added
4827     */
4828    public FakeLanguageGen addModifiers(Modifier... mods) {
4829        FakeLanguageGen next = copy();
4830        Collections.addAll(next.modifiers, mods);
4831        return next;
4832    }
4833
4834    /**
4835     * Creates a copy of this FakeLanguageGen with no modifiers.
4836     *
4837     * @return a copy of this FakeLanguageGen with modifiers removed.
4838     */
4839    public FakeLanguageGen removeModifiers() {
4840        FakeLanguageGen next = copy();
4841        next.modifiers.clear();
4842        return next;
4843    }
4844
4845    /**
4846     * Convenience method that just calls {@link Modifier#Modifier(String, String)}.
4847     * @param pattern a String that will be interpreted as a regex pattern using {@link Pattern}
4848     * @param replacement a String that will be interpreted as a replacement string for pattern; can include "$1" and the like if pattern has groups
4849     * @return a Modifier that can be applied to a FakeLanguagGen
4850     */
4851    public static Modifier modifier(String pattern, String replacement) {
4852        return new Modifier(pattern, replacement);
4853    }
4854    /**
4855     * Convenience method that just calls {@link Modifier#Modifier(String, String, double)}.
4856     * @param pattern a String that will be interpreted as a regex pattern using {@link Pattern}
4857     * @param replacement a String that will be interpreted as a replacement string for pattern; can include "$1" and the like if pattern has groups
4858     * @param chance the chance, as a double between 0 and 1, that the Modifier will take effect
4859     * @return a Modifier that can be applied to a FakeLanguagGen
4860     */
4861    public static Modifier modifier(String pattern, String replacement, double chance) {
4862        return new Modifier(pattern, replacement, chance);
4863    }
4864
4865    @Override
4866    public boolean equals(Object o) {
4867        if (this == o) return true;
4868        if (o == null || getClass() != o.getClass()) return false;
4869
4870        FakeLanguageGen that = (FakeLanguageGen) o;
4871
4872        if (clean != that.clean) return false;
4873        if (Double.compare(that.totalSyllableFrequency, totalSyllableFrequency) != 0) return false;
4874        if (Double.compare(that.vowelStartFrequency, vowelStartFrequency) != 0) return false;
4875        if (Double.compare(that.vowelEndFrequency, vowelEndFrequency) != 0) return false;
4876        if (Double.compare(that.vowelSplitFrequency, vowelSplitFrequency) != 0) return false;
4877        if (Double.compare(that.syllableEndFrequency, syllableEndFrequency) != 0) return false;
4878        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4879        if (!Arrays.equals(openingVowels, that.openingVowels)) return false;
4880        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4881        if (!Arrays.equals(midVowels, that.midVowels)) return false;
4882        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4883        if (!Arrays.equals(openingConsonants, that.openingConsonants)) return false;
4884        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4885        if (!Arrays.equals(midConsonants, that.midConsonants)) return false;
4886        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4887        if (!Arrays.equals(closingConsonants, that.closingConsonants)) return false;
4888        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4889        if (!Arrays.equals(vowelSplitters, that.vowelSplitters)) return false;
4890        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4891        if (!Arrays.equals(closingSyllables, that.closingSyllables)) return false;
4892        if (!Arrays.equals(syllableFrequencies, that.syllableFrequencies)) return false;
4893        // Probably incorrect - comparing Object[] arrays with Arrays.equals
4894        if (!Arrays.equals(sanityChecks, that.sanityChecks)) return false;
4895        return modifiers != null ? modifiers.equals(that.modifiers) : that.modifiers == null;
4896    }
4897
4898    @Override
4899    public int hashCode() {
4900        int result = 31 * 31 * 31 * 31 +
4901                31 * 31 * 31 * CrossHash.hash(openingVowels) +
4902                31 * 31 * CrossHash.hash(midVowels) +
4903                31 * CrossHash.hash(openingConsonants) +
4904                CrossHash.hash(midConsonants) | 0;
4905        result = 31 * 31 * 31 * 31 * result +
4906                31 * 31 * 31 * CrossHash.hash(closingConsonants) +
4907                31 * 31 * CrossHash.hash(vowelSplitters) +
4908                31 * CrossHash.hash(closingSyllables) ^
4909                (clean ? 1 : 0);
4910        result = 31 * 31 * 31 * 31 * result +
4911                31 * 31 * 31 * CrossHash.hash(syllableFrequencies) +
4912                31 * 31 * NumberTools.doubleToMixedIntBits(totalSyllableFrequency) +
4913                31 * NumberTools.doubleToMixedIntBits(vowelStartFrequency) +
4914                NumberTools.doubleToMixedIntBits(vowelEndFrequency) | 0;
4915        result = 31 * 31 * 31 * 31 * result +
4916                31 * 31 * 31 * (sanityChecks != null ? sanityChecks.length + 1 : 0) +
4917                31 * 31 * NumberTools.doubleToMixedIntBits(syllableEndFrequency) +                 
4918                31 * NumberTools.doubleToMixedIntBits(vowelSplitFrequency) | 0;
4919        if(modifiers != null) {
4920            for (int i = 0; i < modifiers.size(); i++) {
4921                result = result + 7 * (i + 1) * modifiers.get(i).hashCode() | 0;
4922            }
4923        }
4924        return result;
4925    }
4926    public long hash64() {
4927        long result           = CrossHash.hash64(openingVowels);
4928        result = 31L * result + CrossHash.hash64(midVowels);
4929        result = 31L * result + CrossHash.hash64(openingConsonants);
4930        result = 31L * result + CrossHash.hash64(midConsonants);
4931        result = 31L * result + CrossHash.hash64(closingConsonants);
4932        result = 31L * result + CrossHash.hash64(vowelSplitters);
4933        result = 31L * result + CrossHash.hash64(closingSyllables);
4934        result = 31L * result + CrossHash.hash64(syllableFrequencies);
4935        result = 31L * result + (clean ? 1L : 0L);
4936        result = 31L * result + NumberTools.doubleToLongBits(totalSyllableFrequency);
4937        result = 31L * result + NumberTools.doubleToLongBits(vowelStartFrequency);
4938        result = 31L * result + NumberTools.doubleToLongBits(vowelEndFrequency);
4939        result = 31L * result + NumberTools.doubleToLongBits(vowelSplitFrequency);
4940        result = 31L * result + NumberTools.doubleToLongBits(syllableEndFrequency);
4941        result = 31L * result + (sanityChecks != null ? sanityChecks.length + 1L : 0L);
4942        result *= 31L;
4943        if(modifiers != null) {
4944            for (int i = 0; i < modifiers.size(); i++) {
4945                result += 7L * (i + 1L) * CrossHash.hash64(modifiers.get(i).alterations);
4946            }
4947        }
4948        return result;
4949    }
4950
4951    @Override
4952    public String toString() {
4953        return "FakeLanguageGen{" +
4954                "openingVowels=" + Arrays.toString(openingVowels) +
4955                ", midVowels=" + Arrays.toString(midVowels) +
4956                ", openingConsonants=" + Arrays.toString(openingConsonants) +
4957                ", midConsonants=" + Arrays.toString(midConsonants) +
4958                ", closingConsonants=" + Arrays.toString(closingConsonants) +
4959                ", vowelSplitters=" + Arrays.toString(vowelSplitters) +
4960                ", closingSyllables=" + Arrays.toString(closingSyllables) +
4961                ", clean=" + clean +
4962                ", syllableFrequencies=" + Arrays.toString(syllableFrequencies) +
4963                ", totalSyllableFrequency=" + totalSyllableFrequency +
4964                ", vowelStartFrequency=" + vowelStartFrequency +
4965                ", vowelEndFrequency=" + vowelEndFrequency +
4966                ", vowelSplitFrequency=" + vowelSplitFrequency +
4967                ", syllableEndFrequency=" + syllableEndFrequency +
4968                ", sanityChecks=" + Arrays.toString(sanityChecks) +
4969                ", modifiers=" + modifiers +
4970                '}';
4971    }
4972
4973    public FakeLanguageGen copy() {
4974        return new FakeLanguageGen(openingVowels, midVowels, openingConsonants, midConsonants,
4975                closingConsonants, closingSyllables, vowelSplitters, syllableFrequencies, vowelStartFrequency,
4976                vowelEndFrequency, vowelSplitFrequency, syllableEndFrequency, sanityChecks, clean, modifiers)
4977                .summarize(summary).setName(name);
4978    }
4979
4980
4981    public String serializeToString() {
4982        return (summary == null) ? "" : summary;
4983    }
4984
4985    public static FakeLanguageGen deserializeFromString(String data) {
4986        if (data == null || data.equals(""))
4987            return ENGLISH.copy();
4988        int poundIndex = data.indexOf('#'), snailIndex = data.indexOf('@'), tempBreak = data.indexOf('℗'),
4989                breakIndex = (tempBreak < 0) ? data.length() : tempBreak,
4990                tildeIndex = Math.min(data.indexOf('~'), breakIndex), prevTildeIndex = -1;
4991        if (tildeIndex < 0)
4992            tildeIndex = data.length();
4993
4994        if (snailIndex < 0)
4995            return ENGLISH.copy();
4996        ArrayList<Object> pairs = new ArrayList<>(4);
4997        while (snailIndex >= 0) {
4998            if (poundIndex >= 0 && poundIndex < snailIndex) // random case
4999            {
5000                pairs.add(randomLanguage(Long.parseLong(data.substring(poundIndex + 1, snailIndex))));
5001                pairs.add(Double.valueOf(data.substring(snailIndex + 1, tildeIndex)));
5002                poundIndex = -1;
5003            } else {
5004                pairs.add(registry.getAt(Integer.parseInt(data.substring(prevTildeIndex + 1, snailIndex))));
5005                pairs.add(Double.valueOf(data.substring(snailIndex + 1, tildeIndex)));
5006            }
5007            snailIndex = data.indexOf('@', snailIndex + 1);
5008            if (snailIndex > breakIndex)
5009                break;
5010            prevTildeIndex = tildeIndex;
5011            tildeIndex = Math.min(data.indexOf('~', tildeIndex + 1), breakIndex);
5012            if (tildeIndex < 0)
5013                tildeIndex = data.length();
5014        }
5015        ArrayList<Modifier> mods = new ArrayList<>(8);
5016        if (breakIndex == tempBreak) {
5017            tildeIndex = breakIndex - 1;
5018            while ((prevTildeIndex = data.indexOf('℗', tildeIndex + 1)) >= 0) {
5019                tildeIndex = data.indexOf('℗', prevTildeIndex + 1);
5020                if (tildeIndex < 0) tildeIndex = data.length();
5021                mods.add(Modifier.deserializeFromString(data.substring(prevTildeIndex, tildeIndex)));
5022            }
5023        }
5024        FakeLanguageGen flg = mixAll(pairs.toArray());
5025        flg.modifiers.addAll(mods);
5026        return flg;
5027    }
5028
5029    public static class Modifier implements Serializable {
5030        private static final long serialVersionUID = 1734863678490422371L;
5031        private transient static final StringBuilder modSB = new StringBuilder(32);
5032        public final Alteration[] alterations;
5033
5034        public Modifier() {
5035            alterations = new Alteration[0];
5036        }
5037
5038        public Modifier(String pattern, String replacement) {
5039            alterations = new Alteration[]{new Alteration(pattern, replacement)};
5040        }
5041
5042        public Modifier(String pattern, String replacement, double chance) {
5043            alterations = new Alteration[]{new Alteration(pattern, replacement, chance)};
5044        }
5045
5046        public Modifier(Alteration... alts) {
5047            alterations = (alts == null) ? new Alteration[0] : alts;
5048        }
5049
5050        public StringBuilder modify(IRNG rng, StringBuilder sb) {
5051            Matcher m;
5052            Replacer.StringBuilderBuffer tb;
5053            boolean found;
5054            Alteration alt;
5055            for (int a = 0; a < alterations.length; a++) {
5056                alt = alterations[a];
5057                modSB.setLength(0);
5058                tb = Replacer.wrap(modSB);
5059                m = alt.replacer.getPattern().matcher(sb);
5060
5061                found = false;
5062                while (true) {
5063                    if (alt.chance >= 1 || rng.nextDouble() < alt.chance) {
5064                        if (!Replacer.replaceStep(m, alt.replacer.getSubstitution(), tb))
5065                            break;
5066                        found = true;
5067                    } else {
5068                        if (!m.find())
5069                            break;
5070                        found = true;
5071                        m.getGroup(MatchResult.PREFIX, tb);
5072                        m.getGroup(MatchResult.MATCH, tb);
5073                        m.setTarget(m, MatchResult.SUFFIX);
5074                    }
5075                }
5076                if (found) {
5077                    m.getGroup(MatchResult.TARGET, tb);
5078                    sb.setLength(0);
5079                    sb.append(modSB);
5080                }
5081            }
5082            return sb;
5083        }
5084
5085        /**
5086         * For a character who always pronounces 's', 'ss', and 'sh' as 'th'.
5087         */
5088        public static final Modifier LISP = new Modifier("[tţťț]?[sśŝşšș]+h?", "th");
5089
5090        /**
5091         * For a character who always lengthens 's' and 'z' sounds not starting a word.
5092         */
5093        public static final Modifier HISS = new Modifier("(.)([sśŝşšșzźżž])+", "$1$2$2$2");
5094
5095        /**
5096         * For a character who has a 20% chance to repeat a starting consonant or vowel.
5097         */
5098        public static final Modifier STUTTER = new Modifier(
5099                new Alteration("^([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу]+)", "$1-$1", 0.2),
5100                new Alteration("^([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųαοειυωаеёийъыэюяоу]+)", "$1-$1", 0.2));
5101
5102        /**
5103         * For a language that has a 40% chance to repeat a single Latin vowel (a, e, o, or a variant on one of them
5104         * like å or ö, but not merged letters like æ and œ).
5105         */
5106        public static final Modifier DOUBLE_VOWELS = new Modifier(
5107                "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳ]|^)"
5108                        + "([aàáâãäåāăąǻeèéêëēĕėęěòóôõöøōŏőǿ])"
5109                        + "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳ]|$)", "$1$2$2$3", 0.4);
5110
5111
5112        /**
5113         * For a language that has a 50% chance to repeat a single consonant.
5114         */
5115        public static final Modifier DOUBLE_CONSONANTS = new Modifier("([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу])" +
5116                "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоуqwhjx])" +
5117                "([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу]|$)", "$1$2$2$3", 0.5);
5118
5119        /**
5120         * For a language that never repeats the same letter twice in a row.
5121         */
5122        public static final Modifier NO_DOUBLES = new Modifier("(.)\\1", "$1");
5123
5124        /**
5125         * Removes accented letters and the two non-English consonants from text generated with {@link #NORSE}.
5126         * Replaces á, é, í, ý, ó, æ, ú, and ö with a, e, i, y, o, ae, and ou. In some instances, replaces j
5127         * with y. Replaces ð and þ with th and th, except for when preceded by s (then it replaces sð or sþ
5128         * with st or st) or when the start of a word is fð or fþ, where it replaces with fr or fr.
5129         */
5130        public static final Modifier SIMPLIFY_NORSE = replacementTable(
5131                "á", "a",
5132                "é", "e",
5133                "í", "i",
5134                "ý", "y",
5135                "ó", "o",
5136                "ú", "u",
5137                "æ", "ae",
5138                "ö", "ou",
5139                "([^aeiou])jy", "$1yai",
5140                "([^aeiou])j(?:[aeiouy]+)", "$1yo",
5141                "s([ðþ])", "st",
5142                "\\bf[ðþ]", "fr",
5143                "[ðþ]", "th");
5144
5145        /**
5146         * Simple changes to merge "ae" into "æ", "oe" into "œ", and any of "aé", "áe", or "áé" into "ǽ".
5147         */
5148        public static final Modifier LIGATURES = replacementTable("ae", "æ", "oe", "œ", "áe", "ǽ", "aé", "ǽ", "áé", "ǽ");
5149        /**
5150         * Some changes that can be applied when sanity checks (which force re-generating a new word) aren't appropriate
5151         * for fixing a word that isn't pronounceable.
5152         */
5153        public static final Modifier GENERAL_CLEANUP = replacementTable(
5154                "[æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy]([æǽœýÿŷỳy])", "$1",
5155                "q([ùúûüũūŭůűųu])$", "q$1e",
5156                "([ìíîïĩīĭįıi])[ìíîïĩīĭįıi]", "$1",
5157                "([æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy])[wŵẁẃẅ]$", "$1",
5158                "([ùúûüũūŭůűųu])([òóôõöøōŏőǿo])", "$2$1",
5159                "[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]([æǽœ])", "$1",
5160                "([æǽœ])[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]", "$1",
5161                "([wŵẁẃẅ])[wŵẁẃẅ]", "$1",
5162                "qq", "q");
5163
5164        //àáâãäåāăąǻæǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳy
5165        //bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżž
5166
5167        /**
5168         * Creates a Modifier that will replace the nth char in initial with the nth char in change. Expects initial and
5169         * change to be the same length, but will use the lesser length if they are not equal-length. Because of the
5170         * state of the text at the time modifiers are run, only lower-case letters need to be searched for.
5171         *
5172         * @param initial a String containing lower-case letters or other symbols to be swapped out of a text
5173         * @param change  a String containing characters that will replace occurrences of characters in initial
5174         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5175         */
5176        public static Modifier charReplacementTable(String initial, String change) {
5177            Alteration[] alts = new Alteration[Math.min(initial.length(), change.length())];
5178            for (int i = 0; i < alts.length; i++) {
5179                //literal string syntax; avoids sensitive escaping issues and also doesn't need a character class,
5180                // which is slightly slower and has some odd escaping cases.
5181                alts[i] = new Alteration("\\Q" + initial.charAt(i), change.substring(i, i + 1));
5182            }
5183            return new Modifier(alts);
5184        }
5185
5186        /**
5187         * Creates a Modifier that will replace the nth String key in map with the nth value. Because of the
5188         * state of the text at the time modifiers are run, only lower-case letters need to be searched for.
5189         * This overload of replacementTable allows full regex pattern strings as keys and replacement syntax,
5190         * such as searching for "([aeiou])\\1+" to find repeated occurrences of the same vowel, and "$1" in
5191         * this example to replace the repeated section with only the first vowel.
5192         * The ordering of map matters if a later key contains an earlier key (the earlier one will be replaced
5193         * first, possibly making the later key not match), or if an earlier replacement causes a later one to
5194         * become valid.
5195         *
5196         * @param map containing String keys to replace and String values to use instead; replacements happen in order
5197         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5198         */
5199        public static Modifier replacementTable(OrderedMap<String, String> map) {
5200            if (map == null)
5201                return new Modifier();
5202            Alteration[] alts = new Alteration[map.size()];
5203            for (int i = 0; i < alts.length; i++) {
5204                alts[i] = new Alteration(map.keyAt(i), map.getAt(i));
5205            }
5206            return new Modifier(alts);
5207        }
5208
5209        /**
5210         * Creates a Modifier that will replace the (n*2)th String in pairs with the (n*2+1)th value in pairs. Because
5211         * of the state of the text at the time modifiers are run, only lower-case letters need to be searched for.
5212         * This overload of replacementTable allows full regex syntax for search and replacement Strings,
5213         * such as searching for "([aeiou])\\1+" to find repeated occurrences of the same vowel, and "$1" in
5214         * this example to replace the repeated section with only the first vowel.
5215         * The ordering of pairs matters if a later search contains an earlier search (the earlier one will be replaced
5216         * first, possibly making the later search not match), or if an earlier replacement causes a later one to
5217         * become valid.
5218         *
5219         * @param pairs array or vararg of alternating Strings to search for and Strings to replace with; replacements happen in order
5220         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5221         */
5222        public static Modifier replacementTable(String... pairs) {
5223            int len;
5224            if (pairs == null || (len = pairs.length) <= 1)
5225                return new Modifier();
5226            Alteration[] alts = new Alteration[len >> 1];
5227            for (int i = 0; i < alts.length; i++) {
5228                alts[i] = new Alteration(pairs[i<< 1], pairs[i<<1|1]);
5229            }
5230            return new Modifier(alts);
5231        }
5232
5233        /**
5234         * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the
5235         * language already uses; insertion will replace an existing vowel (at any point in a word that had a vowel
5236         * generated) with a probability of {@code chance}, so chance should be low (0.1 at most) unless you want the
5237         * newly-inserted vowel to be likely to be present in every word of some sentences.
5238         * @param insertion the String to use as an additional vowel
5239         * @param chance the chance for a vowel cluster to be replaced with insertion; normally 0.1 or less
5240         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5241         */
5242        public static Modifier insertVowel(String insertion, double chance)
5243        {
5244            return new Modifier(anyVowelCluster, insertion, chance);
5245        }
5246
5247        /**
5248         * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants
5249         * that the language already uses; insertion will replace an existing consonant (at any point in a word that had
5250         * a consonant generated) with a probability of {@code chance}, so chance should be low (0.1 at most) unless you
5251         * want the newly-inserted consonant to be likely to be present in every word of some sentences.
5252         * @param insertion the String to use as an additional consonant
5253         * @param chance the chance for a consonant cluster to be replaced with insertion; normally 0.1 or less
5254         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5255         */
5256        public static Modifier insertConsonant(String insertion, double chance)
5257        {
5258            return new Modifier(anyConsonantCluster, insertion, chance);
5259        }
5260
5261        /**
5262         * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the
5263         * language already uses; insertion will replace an existing vowel at the start of a word with a probability of
5264         * {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted vowel to be likely
5265         * to start every word of some sentences. Not all languages can start words with vowels, or do that very rarely,
5266         * so this might not do anything.
5267         * @param insertion the String to use as an additional opening vowel
5268         * @param chance the chance for a vowel cluster at the start of a word to be replaced with insertion; normally 0.2 or less
5269         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5270         */
5271        public static Modifier insertOpeningVowel(String insertion, double chance)
5272        {
5273            return new Modifier("\\b[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]+", insertion, chance);
5274        }
5275
5276        /**
5277         * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants
5278         * that the language already uses; insertion will replace an existing consonant at the start of a word with a
5279         * probability of {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted
5280         * consonant to be likely to start every word of some sentences. Not all languages can start words with
5281         * consonants, or do that very rarely, so this might not do anything.
5282         * @param insertion the String to use as an additional opening consonant
5283         * @param chance the chance for a consonant cluster at the start of a word to be replaced with insertion; normally 0.2 or less
5284         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5285         */
5286        public static Modifier insertOpeningConsonant(String insertion, double chance)
5287        {
5288            return new Modifier("\\b[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]+", insertion, chance);
5289        }
5290
5291        /**
5292         * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the
5293         * language already uses; insertion will replace an existing vowel at the end of a word with a probability of
5294         * {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted vowel to be likely
5295         * to end every word of some sentences. Not all languages can end words with vowels, or do that very
5296         * rarely, so this might not do anything.
5297         * @param insertion the String to use as an additional closing vowel
5298         * @param chance the chance for a vowel cluster at the end of a word to be replaced with insertion; normally 0.2 or less
5299         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5300         */
5301        public static Modifier insertClosingVowel(String insertion, double chance)
5302        {
5303            return new Modifier("[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]+\\b", insertion, chance);
5304        }
5305
5306        /**
5307         * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants
5308         * that the language already uses; insertion will replace an existing consonant at the end of a word with a
5309         * probability of {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted
5310         * consonant to be likely to end every word of some sentences. Not all languages can end words with consonants,
5311         * or do that very rarely, so this might not do anything.
5312         * @param insertion the String to use as an additional closing consonant
5313         * @param chance the chance for a consonant cluster at the end of a word to be replaced with insertion; normally 0.2 or less
5314         * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method
5315         */
5316        public static Modifier insertClosingConsonant(String insertion, double chance)
5317        {
5318            return new Modifier("[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]+\\b", insertion, chance);
5319        }
5320
5321        /**
5322         * Replaces any characters this can produce that aren't in ASCII or Latin-1 with Latin-script stand-ins; this
5323         * will often use accented characters, but will only use those present in Latin-1 (which many fonts support).
5324         * <br>
5325         * The rationale for this Modifier is to allow users of FakeLanguageGen who don't display with the wide-ranging
5326         * fonts in the display module to still be able to display something reasonable for generated text.
5327         */
5328        public static final Modifier REDUCE_ACCENTS = replacementTable("ā", "â", "ă", "ä", "ą", "ã", "ǻ", "å", "ǽ", "áe",
5329                "ē", "ê", "ĕ", "ë", "ė", "ë", "ę", "è", "ě", "é", "ĩ", "í", "ī", "î", "į", "ì", "ĭ", "ï", "ı", "iy", "ō", "ô",
5330                "ŏ", "ö", "ő", "ó", "œ", "oe", "ǿ", "ø", "ũ", "ú", "ŭ", "ü", "ů", "ùo", "ű", "ú", "ų", "ù", "ŷ", "ý", "ỳ", "ÿ",
5331                // done with latin vowels...
5332                "ć", "ç", "ĉ", "ç", "ċ", "ç", "č", "ç", "ď", "dh", "đ", "dh", "ĝ", "gh", "ğ", "gh", "ġ", "gh", "ģ", "gh",
5333                "ĥ", "hh", "ħ", "hh", "ĵ", "jh", "ȷ", "jj", "ķ", "kc", "ĺ", "lh", "ļ", "ll", "ľ", "ly", "ŀł", "yl", "ł", "wl",
5334                "ń", "nn", "ņ", "wn", "ň", "nh", "ŋ", "ng", "ŕ", "rh", "ŗ", "wr", "ř", "rr", "ś", "ss", "ŝ", "hs",
5335                "ş", "sy", "š", "ws", "ș", "sw", "ţ", "wt", "ť", "tt", "ț", "ty", "ŵ", "ww", "ẁ", "hw", "ẃ", "wh", "ẅ", "uw",
5336                "ź", "hz", "ż", "zy", "ž", "zz",
5337                // greek
5338                "α", "a", "ο", "o", "ε", "e", "ι", "i", "υ", "y", "ω", "au",
5339                "κρ", "kr", "γγ", "ng", "γκ", "nk", "γξ", "nx", "γχ", "nch", "ρστ", "rst", "ρτ", "rt",
5340                "ρ", "rh", "σ", "s", "ζ", "z", "τ", "t", "κ", "k", "χ", "ch", "ν", "n", "ξ", "x",
5341                "θ", "th", "μ", "m", "π", "p", "ψ", "ps", "β", "b", "λ", "l", "γ", "g", "δ", "d", "φ", "ph", "ς", "s",
5342                // cyrillic
5343                "а", "a", "е", "e", "ё", "ë", "и", "i", "й", "î", "о", "o", "у", "u", "ъ", "ie", "ы", "y", "э", "e", "ю", "iu", "я", "ia",
5344                "б", "b", "в", "v", "г", "g", "д", "d", "к", "k", "л", "l", "п", "p", "р", "r", "с", "s", "т", "t",
5345                "ф", "f", "х", "kh", "ц", "ts", "ч", "ch", "ж", "zh", "м", "m", "н", "n", "з", "z", "ш", "sh", "щ", "shch");
5346
5347        @Override
5348        public boolean equals(Object o) {
5349            if (this == o) return true;
5350            if (o == null || getClass() != o.getClass()) return false;
5351
5352            Modifier modifier = (Modifier) o;
5353
5354            // Probably incorrect - comparing Object[] arrays with Arrays.equals
5355            return Arrays.equals(alterations, modifier.alterations);
5356        }
5357
5358        @Override
5359        public int hashCode() {
5360            return CrossHash.hash(alterations);
5361        }
5362
5363        @Override
5364        public String toString() {
5365            return "Modifier{" +
5366                    "alterations=" + Arrays.toString(alterations) +
5367                    '}';
5368        }
5369
5370        public String serializeToString() {
5371            if (alterations.length == 0) return "\6";
5372            modSB.setLength(0);
5373            modSB.append('\6');
5374            for (int i = 0; i < alterations.length; i++)
5375                modSB.append(alterations[i].serializeToString()).append('\6');
5376            return modSB.toString();
5377        }
5378
5379        public static Modifier deserializeFromString(String data) {
5380            int currIdx = data.indexOf(6), altIdx = currIdx, matches = 0;
5381            while (currIdx >= 0) {
5382                if ((currIdx = data.indexOf(6, currIdx + 1)) < 0)
5383                    break;
5384                matches++;
5385            }
5386            Alteration[] alts = new Alteration[matches];
5387            for (int i = 0; i < matches; i++) {
5388                alts[i] = Alteration.deserializeFromString(data.substring(altIdx + 1, altIdx = data.indexOf(6, altIdx + 1)));
5389            }
5390            return new Modifier(alts);
5391        }
5392    }
5393
5394    public static class Alteration implements Serializable {
5395        private static final long serialVersionUID = -2138854697837563188L;
5396        public Replacer replacer;
5397        public String replacement;
5398        public double chance;
5399
5400        public Alteration() {
5401            this("[tţťț]?[sśŝşšș]+h?", "th");
5402        }
5403
5404        public Alteration(String pattern, String replacement) {
5405            this.replacement = replacement;
5406            replacer = Pattern.compile(pattern).replacer(replacement);
5407            chance = 1.0;
5408        }
5409
5410        public Alteration(String pattern, String replacement, double chance) {
5411            this.replacement = replacement;
5412            replacer = Pattern.compile(pattern).replacer(replacement);
5413            this.chance = chance;
5414        }
5415
5416        public Alteration(Pattern pattern, String replacement, double chance) {
5417            this.replacement = replacement;
5418            replacer = pattern.replacer(replacement);
5419            this.chance = chance;
5420        }
5421
5422        @Override
5423        public boolean equals(Object o) {
5424            if (this == o) return true;
5425            if (o == null || getClass() != o.getClass()) return false;
5426
5427            Alteration that = (Alteration) o;
5428
5429            if (Double.compare(that.chance, chance) != 0) return false;
5430            return replacer.equals(that.replacer);
5431
5432        }
5433
5434        @Override
5435        public int hashCode() {
5436            long result;
5437            result = CrossHash.hash64(replacer.getPattern().serializeToString());
5438            result = 31L * result + NumberTools.doubleToLongBits(chance);
5439            result ^= result >>> 32;
5440            return (int) (0xFFFFFFFFL & result);
5441        }
5442
5443        @Override
5444        public String toString() {
5445            return "Alteration{" +
5446                    "replacer=" + replacer +
5447                    ", chance=" + chance +
5448                    '}';
5449        }
5450
5451        public String serializeToString() {
5452            return replacer.getPattern().serializeToString() + '\2' + replacement + '\4' + chance;
5453        }
5454
5455        public static Alteration deserializeFromString(String data) {
5456            int split2 = data.indexOf('\2'), split4 = data.indexOf('\4');
5457            return new Alteration(Pattern.deserializeFromString(data.substring(0, split2)),
5458                    data.substring(split2 + 1, split4),
5459                    Double.parseDouble(data.substring(split4 + 1)));
5460        }
5461    }
5462
5463    /**
5464     * A simple way to bundle a FakeLanguageGen with the arguments that would be passed to it when calling
5465     * {@link FakeLanguageGen#sentence(IRNG, int, int, String[], String[], double, int)} or one of its overloads.
5466     * You can call {@link #sentence()} on this to produce another String sentence with the parameters it was given
5467     * at construction. The parameters to
5468     * {@link #SentenceForm(FakeLanguageGen, IStatefulRNG, int, int, String[], String[], double, int)} are stored in fields of
5469     * the same name, and all fields in this class are public and modifiable.
5470     */
5471    public static class SentenceForm implements Serializable
5472    {
5473        private static final long serialVersionUID = 1246527948419533147L;
5474        public IStatefulRNG rng;
5475        public int minWords, maxWords, maxChars;
5476        public String[] midPunctuation, endPunctuation;
5477        public double midPunctuationFrequency;
5478        public FakeLanguageGen language;
5479
5480        /**
5481         * Builds a SentenceForm with all default fields, using {@link FakeLanguageGen#FANTASY_NAME} for a language,
5482         * using between 1 and 9 words in a sentence, and otherwise defaulting to how
5483         * {@link #SentenceForm(FakeLanguageGen, int, int)} behaves.
5484         */
5485        public SentenceForm()
5486        {
5487            this(FakeLanguageGen.FANTASY_NAME, FakeLanguageGen.srng, 1, 9,
5488                    mid,
5489                    end, 0.18, -1);
5490        }
5491        /**
5492         * Builds a SentenceForm with only a few fields specified. The {@link #rng} will be made based on
5493         * FakeLanguageGen's static {@link FakeLanguageGen#srng} field, maxChars will be -1 so the sentence length
5494         * will be limited only by maxWords and the length of words produced, and the between-word and end-of-sentence
5495         * punctuation will be set to reasonable defaults. This places either a comma or a semicolon after a word in the
5496         * middle of a sentence about 18% of the time ({@code midPunctuationFrequency} is 0.18), and can end a sentence
5497         * in a period, exclamation mark, question mark, or ellipsis (the "..." punctuation).
5498         * @param language A FakeLanguageGen to use to generate words
5499         * @param minWords minimum words per sentence
5500         * @param maxWords maximum words per sentence
5501         */
5502        public SentenceForm(FakeLanguageGen language, int minWords, int maxWords)
5503        {
5504            this(language, FakeLanguageGen.srng, minWords, maxWords, mid,
5505                    end, 0.18, -1);
5506        }
5507        /**
5508         * Builds a SentenceForm with all fields specified except for {@link #rng}, which will be made based on
5509         * FakeLanguageGen's static {@link FakeLanguageGen#srng} field, and maxChars, which means the sentence length
5510         * will be limited only by maxWords and the length of words produced.
5511         * @param language A FakeLanguageGen to use to generate words
5512         * @param minWords minimum words per sentence
5513         * @param maxWords maximum words per sentence
5514         * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";"
5515         * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..."
5516         * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0
5517         */
5518        public SentenceForm(FakeLanguageGen language, int minWords, int maxWords, String[] midPunctuation,
5519                            String[] endPunctuation, double midPunctuationFrequency)
5520        {
5521            this(language, FakeLanguageGen.srng, minWords, maxWords, midPunctuation, endPunctuation,
5522                    midPunctuationFrequency, -1);
5523        }
5524        /**
5525         * Builds a SentenceForm with all fields specified except for {@link #rng}, which will be made based on
5526         * FakeLanguageGen's static {@link FakeLanguageGen#srng} field.
5527         * @param language A FakeLanguageGen to use to generate words
5528         * @param minWords minimum words per sentence
5529         * @param maxWords maximum words per sentence
5530         * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";"
5531         * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..."
5532         * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0
5533         * @param maxChars the maximum number of chars to use in a sentence, or -1 for no hard limit
5534         */
5535        public SentenceForm(FakeLanguageGen language, int minWords, int maxWords, String[] midPunctuation,
5536                            String[] endPunctuation, double midPunctuationFrequency, int maxChars)
5537        {
5538            this(language, FakeLanguageGen.srng, minWords, maxWords, midPunctuation, endPunctuation,
5539                    midPunctuationFrequency, maxChars);
5540        }
5541
5542        /**
5543         * Builds a SentenceForm with all fields specified; each value is referenced directly except for {@code rng},
5544         * which will not change or be directly referenced (a new GWTRNG will be used with the same state value).
5545         * @param language A FakeLanguageGen to use to generate words
5546         * @param rng a StatefulRNG that will not be directly referenced; the state will be copied into a new StatefulRNG
5547         * @param minWords minimum words per sentence
5548         * @param maxWords maximum words per sentence
5549         * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";"
5550         * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..."
5551         * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0
5552         * @param maxChars the maximum number of chars to use in a sentence, or -1 for no hard limit
5553         */
5554        public SentenceForm(FakeLanguageGen language, IStatefulRNG rng, int minWords, int maxWords,
5555                            String[] midPunctuation, String[] endPunctuation,
5556                            double midPunctuationFrequency, int maxChars)
5557        {
5558            this.language = language;
5559            this.rng = new GWTRNG(rng.getState());
5560            this.minWords = minWords;
5561            this.maxWords = maxWords;
5562            this.midPunctuation = midPunctuation;
5563            this.endPunctuation = endPunctuation;
5564            this.midPunctuationFrequency = midPunctuationFrequency;
5565            this.maxChars = maxChars;
5566        }
5567        public String sentence()
5568        {
5569            return language.sentence(rng, minWords, maxWords, midPunctuation, endPunctuation,
5570                    midPunctuationFrequency, maxChars);
5571        }
5572
5573        public String serializeToString() {
5574            return language.serializeToString() + '℘' +
5575                    rng.getState() + '℘' +
5576                    minWords + '℘' +
5577                    maxWords + '℘' +
5578                    StringKit.join("ℙ", midPunctuation) + '℘' +
5579                    StringKit.join("ℙ", endPunctuation) + '℘' +
5580                    NumberTools.doubleToLongBits(midPunctuationFrequency) + '℘' +
5581                    maxChars;
5582        }
5583        public static SentenceForm deserializeFromString(String ser)
5584        {
5585            int gap = ser.indexOf('℘');
5586            FakeLanguageGen lang = FakeLanguageGen.deserializeFromString(ser.substring(0, gap));
5587            GWTRNG rng = new GWTRNG(
5588                    StringKit.longFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1)));
5589            int minWords = StringKit.intFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1));
5590            int maxWords = StringKit.intFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1));
5591            String[] midPunctuation =
5592                    StringKit.split(ser.substring(gap + 1, gap = ser.indexOf('℘', gap + 1)), "ℙ");
5593            String[] endPunctuation =
5594                    StringKit.split(ser.substring(gap + 1, gap = ser.indexOf('℘', gap + 1)), "ℙ");
5595            double midFreq = NumberTools.longBitsToDouble(StringKit.longFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1)));
5596            int maxChars = StringKit.intFromDec(ser,gap + 1, ser.length());
5597            return new SentenceForm(lang, rng, minWords, maxWords, midPunctuation, endPunctuation, midFreq, maxChars);
5598        }
5599    }
5600}