001package squidpony; 002 003import regexodus.MatchResult; 004import regexodus.Matcher; 005import regexodus.Pattern; 006import regexodus.REFlags; 007import regexodus.Replacer; 008import squidpony.squidmath.*; 009 010import java.io.Serializable; 011import java.util.ArrayList; 012import java.util.Arrays; 013import java.util.Collection; 014import java.util.Collections; 015import java.util.List; 016import java.util.Map; 017import java.util.Set; 018 019/** 020 * A text generator for producing sentences and/or words in nonsense languages that fit a theme. This does not use an 021 * existing word list as a basis for its output, so it may or may not produce existing words occasionally, but you can 022 * safely assume it won't generate a meaningful sentence except in the absolute unlikeliest of cases. 023 * <br> 024 * This supports a lot of language styles in predefined constants. There's a registry of these constants in 025 * {@link #registered} and their names in {@link #registeredNames}, plus the languages that would make sense for 026 * real-world cultures to use (and all use the Latin alphabet, so they can be swapped around) are in 027 * {@link #romanizedHumanLanguages}. You can make a new language with a constructor, but it's pretty time-consuming; the 028 * recommended ways are generating a random language with {@link #randomLanguage(long)} (when you don't care too much 029 * about exactly how it should sound), or blending two or more languages with {@link #mixAll(Object...)} or 030 * {@link #mix(double, FakeLanguageGen, double, Object...)} (when you have a sound in mind that isn't quite met by an 031 * existing language). 032 * <br> 033 * Created by Tommy Ettinger on 11/29/2015. 034 * @see NaturalLanguageCipher NaturalLanguageCipher uses a FakeLanguageGen to reversibly translate English text to nonsense. 035 * @see Thesaurus Thesaurus uses this class a lot to generate things like plant names and the titles of nations. 036 */ 037public class FakeLanguageGen implements Serializable { 038 private static final long serialVersionUID = -2396642435461186352L; 039 public final String[] openingVowels, midVowels, openingConsonants, midConsonants, closingConsonants, 040 vowelSplitters, closingSyllables; 041 public final boolean clean; 042 public final double[] syllableFrequencies; 043 protected double totalSyllableFrequency; 044 public final double vowelStartFrequency, vowelEndFrequency, vowelSplitFrequency, syllableEndFrequency; 045 public final Pattern[] sanityChecks; 046 public ArrayList<Modifier> modifiers; 047 public static final GWTRNG srng = new GWTRNG(); 048 private static final OrderedMap<String, FakeLanguageGen> registry = new OrderedMap<>(64, Hashers.caseInsensitiveStringHasher); 049 protected String summary; 050 protected String name = "Nameless Language"; 051 private static final transient StringBuilder sb = new StringBuilder(20); 052 private static final transient StringBuilder ender = new StringBuilder(12); 053 private static final transient StringBuilder ssb = new StringBuilder(80); 054 /** 055 * A pattern String that will match any vowel FakeLanguageGen can produce out-of-the-box, including Latin, Greek, 056 * and Cyrillic; for use when a String will be interpreted as a regex (as in {@link FakeLanguageGen.Alteration}). 057 */ 058 public static final String anyVowel = "[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]", 059 /** 060 * A pattern String that will match one or more of any vowels FakeLanguageGen can produce out-of-the-box, including 061 * Latin, Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in 062 * {@link FakeLanguageGen.Alteration}). 063 */ 064 anyVowelCluster = anyVowel + '+', 065 /** 066 * A pattern String that will match any consonant FakeLanguageGen can produce out-of-the-box, including Latin, 067 * Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in 068 * {@link FakeLanguageGen.Alteration}). 069 */ 070 anyConsonant = "[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]", 071 /** 072 * A pattern String that will match one or more of any consonants FakeLanguageGen can produce out-of-the-box, 073 * including Latin, Greek, and Cyrillic; for use when a String will be interpreted as a regex (as in 074 * {@link FakeLanguageGen.Alteration}). 075 */ 076 anyConsonantCluster = anyConsonant + '+'; 077 protected static final Pattern repeats = Pattern.compile("(.)\\1+"), 078 vowelClusters = Pattern.compile(anyVowelCluster, REFlags.IGNORE_CASE | REFlags.UNICODE), 079 consonantClusters = Pattern.compile(anyConsonantCluster, REFlags.IGNORE_CASE | REFlags.UNICODE); 080 //latin 081 //àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳybcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżž 082 //ÀÁÂÃÄÅÆĀĂĄǺǼAÈÉÊËĒĔĖĘĚEÌÍÎÏĨĪĬĮIIÒÓÔÕÖØŌŎŐŒǾOÙÚÛÜŨŪŬŮŰŲUÝŸŶỲYBCÇĆĈĊČDÞÐĎĐFGĜĞĠĢHĤĦJĴȷKĶLĹĻĽĿŁMNÑŃŅŇŊPQRŔŖŘSŚŜŞŠȘTŢŤȚVWŴẀẂẄXYÝŸŶỲZŹŻŽṚṜḶḸḌṬṄṆṢṂḤ 083 //greek 084 //αοειυρσζτκχνθμπψβλγφξς 085 //ΑΟΕΙΥΡΣΖΤΚΧΝΘΜΠΨΒΛΓΦΞ 086 //cyrillic 087 //аеёийоуъыэюябвгдклпрстфхцжмнзчшщ 088 //АЕЁИЙОУЪЫЭЮЯБВГДКЛПРСТФХЦЖМНЗЧШЩ 089 090 private static final Pattern[] 091 vulgarChecks = new Pattern[] 092 { 093 //17 is REFlags.UNICODE | REFlags.IGNORE_CASE 094 Pattern.compile("[sξζzkкκcсς][hнlι].{1,3}[dtтτΓг]", 17), 095 Pattern.compile("(?:(?:[pрρ][hн])|[fd]).{1,3}[kкκcсςxхжχq]", 17), // lots of these end in a 'k' sound, huh 096 Pattern.compile("[kкκcсςСQq][uμυνvhн]{1,3}[kкκcсςxхжχqmм]", 17), 097 Pattern.compile("[bъыбвβЪЫБ].?[iτιyуλγУ].?[cсς]", 17), 098 Pattern.compile("[hн][^aаαΛeезξεЗΣiτιyуλγУ][^aаαΛeезξεЗΣiτιyуλγУ]?[rяΓ]", 17), 099 Pattern.compile("[tтτΓгcсς][iτιyуλγУ][tтτΓг]+$", 17), 100 Pattern.compile("(?:(?:[pрρ][hн])|f)[aаαΛhн]{1,}[rяΓ][tтτΓг]", 17), 101 Pattern.compile("[Ssξζzcсς][hн][iτιyуλγУ].?[sξζzcсς]", 17), 102 Pattern.compile("[aаαΛ][nи][aаαΛeезξεЗΣiτιyуλγУoоюσοuμυνv]{1,2}[Ssξlιζz]", 17), 103 Pattern.compile("[aаαΛ]([sξζz]{2})", 17), 104 Pattern.compile("[kкκcсςСQq][hн]?[uμυνv]([hн]?)[nи]+[tтτΓг]", 17), 105 Pattern.compile("[nиfvν]..?[jg]", 17), // might as well remove two possible slurs and a body part with one check 106 Pattern.compile("[pрρ](?:(?:([eезξεЗΣoоюσοuμυνv])\\1)|(?:[eезξεЗΣiτιyуλγУuμυνv]+[sξζz]))", 17), // the grab bag of juvenile words 107 Pattern.compile("[mм][hнwψшщ]?..?[rяΓ].?d", 17), // should pick up the #1 obscenity from Spanish and French 108 Pattern.compile("[g][hн]?[aаαАΑΛeеёзξεЕЁЗΕΣ][yуλγУeеёзξεЕЁЗΕΣ]", 17), // could be inappropriate for random text 109 Pattern.compile("[wψшщuμυνv](?:[hн]?)[aаαΛeеёзξεЗΕΣoоюσοuμυνv](?:[nи]+)[gkкκcсςxхжχq]", 17) 110 }, 111 genericSanityChecks = new Pattern[] 112 { 113 Pattern.compile("[AEIOUaeiou]{3}"), 114 Pattern.compile("(\\p{L})\\1\\1"), 115 Pattern.compile("[Ii][iyq]"), 116 Pattern.compile("[Yy]([aiu])\\1"), 117 Pattern.compile("[Rr][uy]+[rh]"), 118 Pattern.compile("[Qq]u[yu]"), 119 Pattern.compile("[^oaei]uch"), 120 Pattern.compile("[Hh][tcszi]?h"), 121 Pattern.compile("[Tt]t[^aeiouy]{2}"), 122 Pattern.compile("[Yy]h([^aeiouy]|$)"), 123 Pattern.compile("([xqy])\\1$"), 124 Pattern.compile("[qi]y$"), 125 Pattern.compile("[szSZrlRL]+?[^aeiouytdfgkcpbmnslrv][rlsz]"), 126 Pattern.compile("[UIuiYy][wy]"), 127 Pattern.compile("^[UIui]e"), 128 Pattern.compile("^([^aeioyl])\\1", 17) 129 }, 130 englishSanityChecks = new Pattern[] 131 { 132 Pattern.compile("[AEIOUaeiou]{3}"), 133 Pattern.compile("(\\w)\\1\\1"), 134 Pattern.compile("(.)\\1(.)\\2"), 135 Pattern.compile("[Aa][ae]"), 136 Pattern.compile("[Uu][umlkj]"), 137 Pattern.compile("[Ii][iyqkhrl]"), 138 Pattern.compile("[Oo][c]"), 139 Pattern.compile("[Yy]([aiu])\\1"), 140 Pattern.compile("[Rr][aeiouy]+[rh]"), 141 Pattern.compile("[Qq]u[yu]"), 142 Pattern.compile("[^oaei]uch"), 143 Pattern.compile("[Hh][tcszi]?h"), 144 Pattern.compile("[Tt]t[^aeiouy]{2}"), 145 Pattern.compile("[Yy]h([^aeiouy]|$)"), 146 Pattern.compile("[szSZrlRL]+?[^aeiouytdfgkcpbmnslr][rlsz]"), 147 Pattern.compile("[UIuiYy][wy]"), 148 Pattern.compile("^[UIui][ae]"), 149 Pattern.compile("q(?:u?)$") 150 }, 151 japaneseSanityChecks = new Pattern[] 152 { 153 Pattern.compile("[AEIOUaeiou]{3}"), 154 Pattern.compile("(\\w)\\1\\1"), 155 Pattern.compile("[Tt]s[^u]"), 156 Pattern.compile("[Ff][^u]"), 157 Pattern.compile("[Yy][^auo]"), 158 Pattern.compile("[Tt][ui]"), 159 Pattern.compile("[SsZzDd]i"), 160 Pattern.compile("[Hh]u"), 161 }, 162 arabicSanityChecks = new Pattern[] 163 { 164 Pattern.compile("(.)\\1\\1"), 165 Pattern.compile("-[^aeiou](?:[^aeiou]|$)"), 166 }; 167 private static final Replacer[] 168 accentFinders = new Replacer[] 169 { 170 Pattern.compile("[àáâäăāãåąǻ]").replacer("a"), 171 Pattern.compile("[èéêëĕēėęě]").replacer("e"), 172 Pattern.compile("[ìíîïĭīĩįı]").replacer("i"), 173 Pattern.compile("[òóôöŏōõøőǿ]").replacer("o"), 174 Pattern.compile("[ùúûüŭūũůűų]").replacer("u"), 175 Pattern.compile("[æǽ]").replacer("ae"), 176 Pattern.compile("œ").replacer("oe"), 177 Pattern.compile("[ÀÁÂÃÄÅĀĂĄǺ]").replacer("A"), 178 Pattern.compile("[ÈÉÊËĒĔĖĘĚ]").replacer("E"), 179 Pattern.compile("[ÌÍÎÏĨĪĬĮI]").replacer("I"), 180 Pattern.compile("[ÒÓÔÕÖØŌŎŐǾ]").replacer("O"), 181 Pattern.compile("[ÙÚÛÜŨŪŬŮŰŲ]").replacer("U"), 182 Pattern.compile("[ÆǼ]").replacer("Ae"), 183 Pattern.compile("Œ").replacer("Oe"), 184 Pattern.compile("Ё").replacer("Е"), 185 Pattern.compile("Й").replacer("И"), 186 Pattern.compile("[çćĉċč]").replacer("c"), 187 Pattern.compile("[þðďđ]").replacer("d"), 188 Pattern.compile("[ĝğġģ]").replacer("g"), 189 Pattern.compile("[ĥħ]").replacer("h"), 190 Pattern.compile("[ĵȷ]").replacer("j"), 191 Pattern.compile("ķ").replacer("k"), 192 Pattern.compile("[ĺļľŀłļ]").replacer("l"), 193 Pattern.compile("[ñńņňŋ]").replacer("n"), 194 Pattern.compile("[ŕŗřŗŕ]").replacer("r"), 195 Pattern.compile("[śŝşšș]").replacer("s"), 196 Pattern.compile("[ţťŧț]").replacer("t"), 197 Pattern.compile("[ŵẁẃẅ]").replacer("w"), 198 Pattern.compile("[ýÿŷỳ]").replacer("y"), 199 Pattern.compile("[źżž]").replacer("z"), 200 Pattern.compile("[ÇĆĈĊČ]").replacer("C"), 201 Pattern.compile("[ÞÐĎĐḌ]").replacer("D"), 202 Pattern.compile("[ĜĞĠĢ]").replacer("G"), 203 Pattern.compile("[ĤĦḤ]").replacer("H"), 204 Pattern.compile("Ĵ").replacer("J"), 205 Pattern.compile("Ķ").replacer("K"), 206 Pattern.compile("[ĹĻĽĿŁḶḸĻ]").replacer("L"), 207 Pattern.compile("Ṃ").replacer("M"), 208 Pattern.compile("[ÑŃŅŇŊṄṆ]").replacer("N"), 209 Pattern.compile("[ŔŖŘṚṜŖŔ]").replacer("R"), 210 Pattern.compile("[ŚŜŞŠȘṢ]").replacer("S"), 211 Pattern.compile("[ŢŤŦȚṬ]").replacer("T"), 212 Pattern.compile("[ŴẀẂẄ]").replacer("W"), 213 Pattern.compile("[ÝŸŶỲ]").replacer("Y"), 214 Pattern.compile("[ŹŻŽ]").replacer("Z"), 215 Pattern.compile("ё").replacer("е"), 216 Pattern.compile("й").replacer("и"), 217 }; 218 219 static final char[][] accentedVowels = new char[][]{ 220 new char[]{'a', 'à', 'á', 'â', 'ä', 'ā', 'ă', 'ã', 'å', 'ą', 'ǻ'}, 221 new char[]{'e', 'è', 'é', 'ê', 'ë', 'ē', 'ĕ', 'ė', 'ę', 'ě'}, 222 new char[]{'i', 'ì', 'í', 'î', 'ï', 'ī', 'ĭ', 'ĩ', 'į', 'ı',}, 223 new char[]{'o', 'ò', 'ó', 'ô', 'ö', 'ō', 'ŏ', 'õ', 'ø', 'ő', 'ǿ'}, 224 new char[]{'u', 'ù', 'ú', 'û', 'ü', 'ū', 'ŭ', 'ũ', 'ů', 'ű', 'ų'} 225 }, 226 accentedConsonants = new char[][] 227 { 228 new char[]{ 229 'b' 230 }, 231 new char[]{ 232 'c', 'ç', 'ć', 'ĉ', 'ċ', 'č', 233 }, 234 new char[]{ 235 'd', 'þ', 'ð', 'ď', 'đ', 236 }, 237 new char[]{ 238 'f' 239 }, 240 new char[]{ 241 'g', 'ĝ', 'ğ', 'ġ', 'ģ', 242 }, 243 new char[]{ 244 'h', 'ĥ', 'ħ', 245 }, 246 new char[]{ 247 'j', 'ĵ', 'ȷ', 248 }, 249 new char[]{ 250 'k', 'ķ', 251 }, 252 new char[]{ 253 'l', 'ĺ', 'ļ', 'ľ', 'ŀ', 'ł', 254 }, 255 new char[]{ 256 'm', 257 }, 258 new char[]{ 259 'n', 'ñ', 'ń', 'ņ', 'ň', 'ŋ', 260 }, 261 new char[]{ 262 'p', 263 }, 264 new char[]{ 265 'q', 266 }, 267 new char[]{ 268 'r', 'ŕ', 'ŗ', 'ř', 269 }, 270 new char[]{ 271 's', 'ś', 'ŝ', 'ş', 'š', 'ș', 272 }, 273 new char[]{ 274 't', 'ţ', 'ť', 'ț', 275 }, 276 new char[]{ 277 'v', 278 }, 279 new char[]{ 280 'w', 'ŵ', 'ẁ', 'ẃ', 'ẅ', 281 }, 282 new char[]{ 283 'x', 284 }, 285 new char[]{ 286 'y', 'ý', 'ÿ', 'ŷ', 'ỳ', 287 }, 288 new char[]{ 289 'z', 'ź', 'ż', 'ž', 290 }, 291 }; 292 private static final OrderedMap<String, String> openVowels, 293 openCons, midCons, closeCons; 294 295 static { 296 297 registry.put("", null); 298 299 openVowels = Maker.makeOM( 300 "a", "a aa ae ai au ea ia oa ua", 301 "e", "e ae ea ee ei eo eu ie ue", 302 "i", "i ai ei ia ie io iu oi ui", 303 "o", "o eo io oa oi oo ou", 304 "u", "u au eu iu ou ua ue ui"); 305 306 openCons = Maker.makeOM( 307 "b", "b bl br by bw bh", 308 "bh", "bh", 309 "c", "c cl cr cz cth sc scl", 310 "ch", "ch ch chw", 311 "d", "d dr dz dy dw dh", 312 "dh", "dh", 313 "f", "f fl fr fy fw sf", 314 "g", "g gl gr gw gy gn", 315 "h", "bh cth ch ch chw dh h hm hy hw kh khl khw ph phl phr sh shl shqu shk shp shm shn shr shw shpl th th thr thl thw", 316 "j", "j j", 317 "k", "k kr kl ky kn sk skl shk", 318 "kh", "kh khl khw", 319 "l", "bl cl fl gl kl khl l pl phl scl skl spl sl shl shpl tl thl vl zl", 320 "m", "hm m mr mw my sm smr shm", 321 "n", "gn kn n nw ny pn sn shn", 322 "p", "p pl pr py pw pn sp spr spl shp shpl ph phl phr", 323 "ph", "ph phl phr", 324 "q", "q", 325 "qu", "qu squ shqu", 326 "r", "br cr dr fr gr kr mr pr phr r str spr smr shr tr thr vr wr zvr", 327 "s", "s sc scl sf sk skl st str sp spr spl sl sm smr sn sw sy squ ts sh shl shqu shk shp shm shn shr shw shpl", 328 "sh", "sh shl shqu shk shp shm shn shr shw shpl", 329 "t", "st str t ts tr tl ty tw tl", 330 "th", "cth th thr thl thw", 331 "tl", "tl", 332 "v", "v vr vy zv zvr vl", 333 "w", "bw chw dw fw gw hw khw mw nw pw sw shw tw thw w wr zw", 334 "x", "x", 335 "y", "by dy fy gy hy ky my ny py sy ty vy y zy", 336 "z", "cz dz z zv zvr zl zy zw"); 337 338 midCons = Maker.makeOM( 339 "b", "lb rb bj bl br lbr rbl skbr scbr zb bq bdh dbh bbh lbh rbh bb", 340 "bh", "bbh dbh lbh rbh", 341 "c", "lc lsc rc rsc cl cqu cr ct lcr rcl sctr scdr scbr scpr msc mscr nsc nscr ngscr ndscr cc", 342 "ch", "lch rch rch", 343 "d", "ld ld rd rd skdr scdr dr dr dr rdr ldr zd zdr ndr ndscr ndskr ndst dq ldh rdh dbh bdh ddh dd", 344 "dh", "bdh ddh ldh rdh", 345 "f", "lf rf fl fr fl fr fl fr lfr rfl ft ff", 346 "g", "lg lg rg rg gl gr gl gr gl gr lgr rgl zg zgr ngr ngl ngscr ngskr gq gg", 347 "h", "lch lph lth lsh rch rph rsh rth phl phr lphr rphl shl shr lshr rshl msh mshr zth bbh dbh lbh rbh bdh ddh ldh rdh", 348 "j", "bj lj rj", 349 "k", "lk lsk rk rsk kl kr lkr rkl sktr skdr skbr skpr tk zk zkr msk mskr nsk nskr ngskr ndskr kq kk", 350 "kh", "lkh rkh", 351 "l", "lb lc lch ld lf lg lj lk lm ln lp lph ls lst lt lth lsc lsk lsp lv lz lsh bl lbr rbl cl lcr rcl fl lfr rfl gl lgr rgl kl lkr rkl pl lpr rpl phl lphr rphl shl lshr rshl sl rsl lsl ldr ltr lx ngl nsl msl nsl ll lth tl ltl rtl vl", 352 "m", "lm rm zm msl msc mscr msh mshr mst msp msk mskr mm", 353 "n", "ln rn nx zn zn ndr nj ntr ntr ngr ngl nsl nsl nsc nscr ngscr ndscr nsk nskr ngskr ndskr nst ndst nsp nn", 354 "p", "lp lsp rp rsp pl pr lpr rpl skpr scpr zp msp nsp lph rph phl phr lphr rphl pq pp", 355 "ph", "lph lph rph rph phl phr lphr rphl", 356 "q", "bq dq gq kq pq tq", 357 "qu", "cqu lqu rqu", 358 "r", "rb rc rch rd rf rg rj rk rm rn rp rph rs rsh rst rt rth rsc rsk rsp rv rz br br br lbr rbl cr cr cr lcr rcl fr fr fr lfr rfl gr gr gr lgr rgl kr kr kr lkr rkl pr pr pr lpr rpl phr phr phr lphr rphl shr shr shr lshr rshl rsl sktr sctr skdr scdr skbr scbr skpr scpr dr dr dr rdr ldr tr tr tr rtr ltr vr rx zr zdr ztr zgr zkr ntr ntr ndr ngr mscr mshr mskr nscr ngscr ndscr nskr ngskr ndskr rr", 359 "s", "ls lst lsc lsk lsp rs rst rsc rsk rsp sl rsl lsl sktr sctr skdr scdr skbr scbr skpr scpr nsl msl msc mscr mst msp msk mskr nsl nsc nscr ngscr ndscr nsk nskr ngskr ndskr nst ndst nsp lsh rsh sh shl shqu shk shp shm shn shr shw shpl lshr rshl msh mshr ss", 360 "sh", "lsh rsh sh shl shqu shk shp shm shn shr shw shpl lshr rshl msh mshr", 361 "t", "ct ft lst lt rst rt sktr sctr tk tr rtr ltr zt ztr ntr ntr mst nst ndst tq ltl rtl tt", 362 "th", "lth rth zth cth", 363 "tl", "ltl rtl", 364 "v", "lv rv vv vl vr", 365 "w", "bw chw dw fw gw hw khw mw nw pw sw shw tw thw w wr wy zw", 366 "x", "nx rx lx", 367 "y", "by dy fy gy hy ky my ny py sy ty vy wy zy", 368 "z", "lz rz zn zd zt zg zk zm zn zp zb zr zdr ztr zgr zkr zth zz"); 369 370 closeCons = Maker.makeOM("b", "b lb rb bs bz mb mbs bh bh lbh rbh mbh bb", 371 "bh", "bh lbh rbh mbh", 372 "c", "c ck cks lc rc cs cz ct cz cth sc", 373 "ch", "ch lch rch tch pch kch mch nch", 374 "d", "d ld rd ds dz dt dsh dth gd nd nds dh dh ldh rdh ndh dd", 375 "dh", "dh ldh rdh ndh", 376 "f", "f lf rf fs fz ft fsh ft fth ff", 377 "g", "g lg rg gs gz gd gsh gth ng ngs gg", 378 "h", "cth ch lch rch tch pch kch mch nch dsh dth fsh fth gsh gth h hs ksh kth psh pth ph ph ph ph ph ph lph rph phs pht phth", 379 "j", "j", 380 "k", "ck cks kch k lk rk ks kz kt ksh kth nk nks sk", 381 "kh", "kh", 382 "l", "lb lc lch ld lf lg lk l ls lz lp lph ll", 383 "m", "mch m ms mb mt mp mbs mps mz sm mm", 384 "n", "nch n ns nd nt nk nds nks nz ng ngs nn", 385 "p", "pch mp mps p lp rp ps pz pt psh pth sp sp ph lph rph phs pht phth", 386 "ph", "ph lph rph phs pht phth", 387 "q", "q", 388 "qu", "", 389 "r", "rb rc rch rd rf rg rk rp rph r rs rz", 390 "s", "bs cks cs ds fs gs hs ks ls ms mbs mps ns nds nks ngs ps phs rs s st sp st sp sc sk sm ts lsh rsh sh shk shp msh ss", 391 "sh", "lsh rsh sh shk shp msh", 392 "t", "ct ft tch dt ft kt mt nt pt pht st st t ts tz tt", 393 "th", "cth dth fth gth kth pth phth th ths", 394 "tl", "tl", 395 "v", "v", 396 "w", "", 397 "x", "x", 398 "y", "", 399 "z", "bz cz dz fz gz kz lz mz nz pz rz tz z zz"); 400 } 401 402 /* 403 * Removes accented characters from a string; if the "base" characters are non-English anyway then the result won't 404 * be an ASCII string, but otherwise it probably will be. 405 * <br> 406 * Credit to user hashable from http://stackoverflow.com/a/1215117 407 * 408 * @param str a string that may contain accented characters 409 * @return a string with all accented characters replaced with their (possibly ASCII) counterparts 410 * 411 public String removeAccents(String str) { 412 String alteredString = Normalizer.normalize(str, Normalizer.Form.NFD); 413 alteredString = diacritics.matcher(alteredString).replaceAll(""); 414 alteredString = alteredString.replace('æ', 'a'); 415 alteredString = alteredString.replace('œ', 'o'); 416 alteredString = alteredString.replace('Æ', 'A'); 417 alteredString = alteredString.replace('Œ', 'O'); 418 return alteredString; 419 }*/ 420 421 /** 422 * Removes accented Latin-script characters from a string; if the "base" characters are non-English anyway then the 423 * result won't be an ASCII string, but otherwise it probably will be. 424 * 425 * @param str a string that may contain accented Latin-script characters 426 * @return a string with all accented characters replaced with their (possibly ASCII) counterparts 427 */ 428 public static CharSequence removeAccents(CharSequence str) { 429 CharSequence alteredString = str; 430 for (int i = 0; i < accentFinders.length; i++) { 431 alteredString = accentFinders[i].replace(alteredString); 432 } 433 return alteredString; 434 } 435 436 private FakeLanguageGen register(String languageName) { 437 summary = registry.size() + "@1"; 438 registry.put(languageName,this); 439 name = languageName; 440 return copy(); 441 } 442 443 private FakeLanguageGen summarize(String brief) { 444 summary = brief; 445 return this; 446 } 447 448 private static FakeLanguageGen lovecraft() { 449 return new FakeLanguageGen( 450 new String[]{"a", "i", "o", "e", "u", "a", "i", "o", "e", "u", "ia", "ai", "aa", "ei"}, 451 new String[]{}, 452 new String[]{"s", "t", "k", "n", "y", "p", "k", "l", "g", "gl", "th", "sh", "ny", "ft", "hm", "zvr", "cth"}, 453 new String[]{"h", "gl", "gr", "nd", "mr", "vr", "kr"}, 454 new String[]{"l", "p", "s", "t", "n", "k", "g", "x", "rl", "th", "gg", "gh", "ts", "lt", "rk", "kh", "sh", "ng", "shk"}, 455 new String[]{"aghn", "ulhu", "urath", "oigor", "alos", "'yeh", "achtal", "elt", "ikhet", "adzek", "agd"}, 456 new String[]{"'", "-"}, new int[]{1, 2, 3}, new double[]{6, 7, 2}, 457 0.4, 0.31, 0.07, 0.04, null, true); 458 } 459 /** 460 * Ia! Ia! Cthulhu Rl'yeh ftaghn! Useful for generating cultist ramblings or unreadable occult texts. You may want 461 * to consider mixing this with multiple other languages using {@link #mixAll(Object...)}; using some very different 462 * languages in low amounts relative to the amount used for this, like {@link #NAHUATL}, {@link #INUKTITUT}, 463 * {@link #SOMALI}, {@link #DEEP_SPEECH}, and {@link #INSECT} can alter the aesthetic of the generated text in ways 464 * that may help distinguish magic styles. 465 * <br> 466 * Zvrugg pialuk, ya'as irlemrugle'eith iposh hmo-es nyeighi, glikreirk shaivro'ei! 467 */ 468 public static final FakeLanguageGen LOVECRAFT = lovecraft().register("Lovecraft"); 469 private static FakeLanguageGen english() { 470 return new FakeLanguageGen( 471 new String[]{ 472 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 473 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 474 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 475 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 476 "au", "ai", "ai", "ou", "ea", "ie", "io", "ei", 477 }, 478 new String[]{"u", "u", "oa", "oo", "oo", "oo", "ee", "ee", "ee", "ee",}, 479 new String[]{ 480 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n", 481 "p", "pl", "pr", "qu", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "thr", "v", "w", "y", "z", 482 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n", 483 "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y", 484 "b", "br", "c", "ch", "d", "dr", "f", "g", "h", "j", "l", "m", "n", 485 "p", "r", "s", "sh", "st", "sl", "t", "tr", "th", 486 "b", "d", "f", "g", "h", "l", "m", "n", 487 "p", "r", "s", "sh", "t", "th", 488 "b", "d", "f", "g", "h", "l", "m", "n", 489 "p", "r", "s", "sh", "t", "th", 490 "r", "s", "t", "l", "n", 491 "str", "spr", "spl", "wr", "kn", "kn", "gn", 492 }, 493 new String[]{"x", "cst", "bs", "ff", "lg", "g", "gs", 494 "ll", "ltr", "mb", "mn", "mm", "ng", "ng", "ngl", "nt", "ns", "nn", "ps", "mbl", "mpr", 495 "pp", "ppl", "ppr", "rr", "rr", "rr", "rl", "rtn", "ngr", "ss", "sc", "rst", "tt", "tt", "ts", "ltr", "zz" 496 }, 497 new String[]{"b", "rb", "bb", "c", "rc", "ld", "d", "ds", "dd", "f", "ff", "lf", "rf", "rg", "gs", "ch", "lch", "rch", "tch", 498 "ck", "ck", "lk", "rk", "l", "ll", "lm", "m", "rm", "mp", "n", "nk", "nch", "nd", "ng", "ng", "nt", "ns", "lp", "rp", 499 "p", "r", "rn", "rts", "s", "s", "s", "s", "ss", "ss", "st", "ls", "t", "t", "ts", "w", "wn", "x", "ly", "lly", "z", 500 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "t", "w", 501 }, 502 new String[]{"ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 503 "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 504 "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 505 "ay", "ay", "ey", "oy", "ay", "ay", "ey", "oy", 506 "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition", 507 "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition", 508 "ily", "ily", "ily", "adly", "owly", "oorly", "ardly", "iedly", 509 }, 510 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{10, 11, 4, 1}, 511 0.22, 0.1, 0.0, 0.22, englishSanityChecks, true); 512 } 513 /** 514 * Imitation English; may seem closer to Dutch in some generated text, and is not exactly the best imitation. 515 * Should seem pretty fake to many readers; does not filter out dictionary words but does perform basic vulgarity 516 * filtering. If you want to avoid generating other words, you can subclass FakeLanguageGen and modify word() . 517 * <br> 518 * Mont tiste frot; mousation hauddes? 519 * Lily wrely stiebes; flarrousseal gapestist. 520 */ 521 public static final FakeLanguageGen ENGLISH = english().register("English"); 522 523 private static FakeLanguageGen greekRomanized(){ 524 return new FakeLanguageGen( 525 new String[]{"a", "a", "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "i", "i", "au", "ai", "ai", "oi", "oi", 526 "ia", "io", "u", "u", "eo", "ei", "o", "o", "ou", "oi", "y", "y", "y", "y"}, 527 new String[]{"ui", "ui", "ei", "ei"}, 528 new String[]{"rh", "s", "z", "t", "t", "k", "ch", "n", "th", "kth", "m", "p", "ps", "b", "l", "kr", 529 "g", "phth", "d", "t", "k", "ch", "n", "ph", "ph", "k",}, 530 new String[]{"lph", "pl", "l", "l", "kr", "nch", "nx", "ps"}, 531 new String[]{"s", "p", "t", "ch", "n", "m", "s", "p", "t", "ch", "n", "m", "b", "g", "st", "rst", 532 "rt", "sp", "rk", "ph", "x", "z", "nk", "ng", "th", "d", "k", "n", "n",}, 533 new String[]{"os", "os", "os", "is", "is", "us", "um", "eum", "ium", "iam", "us", "um", "es", 534 "anes", "eros", "or", "or", "ophon", "on", "on", "ikon", "otron", "ik",}, 535 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 7, 4, 1}, 0.45, 0.45, 0.0, 0.2, null, true); 536 } 537 538 /** 539 * Imitation ancient Greek, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers. 540 * <br> 541 * Psuilas alor; aipeomarta le liaspa... 542 */ 543 public static final FakeLanguageGen GREEK_ROMANIZED = greekRomanized().register("Greek Romanized"); 544 private static FakeLanguageGen greekAuthentic(){ 545 return new FakeLanguageGen( 546 new String[]{"α", "α", "α", "α", "α", "ο", "ο", "ε", "ε", "ε", "ι", "ι", "ι", "ι", "ι", "αυ", "αι", "αι", "οι", "οι", 547 "ια", "ιο", "ου", "ου", "εο", "ει", "ω", "ω", "ωυ", "ωι", "υ", "υ", "υ", "υ"}, 548 new String[]{"υι", "υι", "ει", "ει"}, 549 new String[]{"ρ", "σ", "ζ", "τ", "τ", "κ", "χ", "ν", "θ", "κθ", "μ", "π", "ψ", "β", "λ", "κρ", 550 "γ", "φθ", "δ", "τ", "κ", "χ", "ν", "φ", "φ", "κ",}, 551 new String[]{"λφ", "πλ", "λ", "λ", "κρ", "γχ", "γξ", "ψ"}, 552 new String[]{"σ", "π", "τ", "χ", "ν", "μ", "σ", "π", "τ", "χ", "ν", "μ", "β", "γ", "στ", "ρστ", 553 "ρτ", "σπ", "ρκ", "φ", "ξ", "ζ", "γκ", "γγ", "θ", "δ", "κ", "ν", "ν",}, 554 new String[]{"ος", "ος", "ος", "ις", "ις", "υς", "υμ", "ευμ", "ιυμ", "ιαμ", "υς", "υμ", "ες", 555 "ανες", "ερος", "ορ", "ορ", "οφον", "ον", "ον", "ικον", "οτρον", "ικ",}, 556 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 7, 4, 1}, 0.45, 0.45, 0.0, 0.2, null, true); 557 } 558 /** 559 * Imitation ancient Greek, using the original Greek alphabet. People may try to translate it and get gibberish. 560 * Make sure the font you use to render this supports the Greek alphabet! In the GDX display module, most 561 * fonts support all the Greek you need for this. 562 * <br> 563 * Ψυιλασ αλορ; αιπεομαρτα λε λιασπα... 564 */ 565 public static final FakeLanguageGen GREEK_AUTHENTIC = greekAuthentic().register("Greek Authentic"); 566 567 private static FakeLanguageGen french(){ 568 return new FakeLanguageGen( 569 new String[]{"a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o", 570 "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o", 571 "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 572 "ai", "oi", "oui", "au", "œu", "ou" 573 }, 574 new String[]{ 575 "ai", "aie", "aou", "eau", "oi", "oui", "oie", "eu", "eu", 576 "à", "â", "ai", "aî", "aï", "aie", "aou", "aoû", "au", "ay", "e", "é", "ée", "è", 577 "ê", "eau", "ei", "eî", "eu", "eû", "i", "î", "ï", "o", "ô", "oe", "oê", "oë", "œu", 578 "oi", "oie", "oï", "ou", "oû", "oy", "u", "û", "ue", 579 "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o", 580 "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 581 "a", "a", "a", "e", "e", "e", "i", "i", "o", "u", "a", "a", "a", "e", "e", "e", "i", "i", "o", 582 "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 583 "ai", "ai", "eau", "oi", "oi", "oui", "eu", "au", "au", "ei", "ei", "oe", "oe", "ou", "ou", "ue" 584 }, 585 new String[]{"tr", "ch", "m", "b", "b", "br", "j", "j", "j", "j", "g", "t", "t", "t", "c", "d", "f", "f", "h", "n", "l", "l", 586 "s", "s", "s", "r", "r", "r", "v", "v", "p", "pl", "pr", "bl", "br", "dr", "gl", "gr"}, 587 new String[]{"cqu", "gu", "qu", "rqu", "nt", "ng", "ngu", "mb", "ll", "nd", "ndr", "nct", "st", 588 "xt", "mbr", "pl", "g", "gg", "ggr", "gl", "bl", "j", "gn", 589 "m", "m", "mm", "v", "v", "f", "f", "f", "ff", "b", "b", "bb", "d", "d", "dd", "s", "s", "s", "ss", "ss", "ss", 590 "cl", "cr", "ng", "ç", "ç", "rç", "rd", "lg", "rg"}, 591 new String[]{"rt", "ch", "m", "b", "b", "lb", "t", "t", "t", "t", "c", "d", "f", "f", "n", "n", "l", "l", 592 "s", "s", "s", "r", "r", "p", "rd", "ff", "ss", "ll" 593 }, 594 new String[]{"e", "e", "e", "e", "e", "é", "é", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es", "es", 595 "e", "e", "e", "e", "e", "é", "é", "er", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es", 596 "e", "e", "e", "e", "e", "é", "é", "é", "er", "er", "er", "er", "er", "es", "es", "es", "es", "es", 597 "ent", "em", "en", "en", "aim", "ain", "an", "oin", "ien", "iere", "ors", "anse", 598 "ombs", "ommes", "ancs", "ends", "œufs", "erfs", "ongs", "aps", "ats", "ives", "ui", "illes", 599 "aen", "aon", "am", "an", "eun", "ein", "age", "age", "uile", "uin", "um", "un", "un", "un", 600 "aille", "ouille", "eille", "ille", "eur", "it", "ot", "oi", "oi", "oi", "aire", "om", "on", "on", 601 "im", "in", "in", "ien", "ien", "ine", "ion", "il", "eil", "oin", "oint", "iguïté", "ience", "incte", 602 "ang", "ong", "acré", "eau", "ouche", "oux", "oux", "ect", "ecri", "agne", "uer", "aix", "eth", "ut", "ant", 603 "anc", "anc", "anche", "ioche", "eaux", "ive", "eur", "ancois", "ecois", "ente", "enri", 604 "arc", "oc", "ouis", "arche", "ique", "ique", "ique", "oque", "arque", "uis", "este", "oir", "oir" 605 }, 606 new String[]{}, new int[]{1, 2, 3}, new double[]{15, 7, 2}, 0.35, 1.0, 0.0, 0.4, null, true); 607 } 608 /** 609 * Imitation modern French, using the (many) accented vowels that are present in the language. Translating it 610 * will produce gibberish if it produces anything at all. In the GDX display module, most 611 * fonts support all the accented characters you need for this. 612 * <br> 613 * Bœurter; ubi plaqua se saigui ef brafeur? 614 */ 615 public static final FakeLanguageGen FRENCH = french().register("French"); 616 617 private static FakeLanguageGen russianRomanized(){ 618 return new FakeLanguageGen( 619 new String[]{"a", "e", "e", "i", "i", "o", "u", "ie", "y", "e", "iu", "ia", "y", "a", "a", "o", "u"}, 620 new String[]{}, 621 new String[]{"b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f", "kh", "ts", 622 "b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f", "kh", "ts", 623 "b", "v", "g", "d", "k", "l", "p", "r", "s", "t", "f", 624 "zh", "m", "n", "z", "ch", "sh", "shch", 625 "br", "sk", "tr", "bl", "gl", "kr", "gr"}, 626 new String[]{"bl", "br", "pl", "dzh", "tr", "gl", "gr", "kr"}, 627 new String[]{"b", "v", "g", "d", "zh", "z", "k", "l", "m", "n", "p", "r", "s", "t", "f", "kh", "ts", "ch", "sh", 628 "v", "f", "sk", "sk", "sk", "s", "b", "d", "d", "n", "r", "r"}, 629 new String[]{"odka", "odna", "usk", "ask", "usky", "ad", "ar", "ovich", "ev", "ov", "of", "agda", "etsky", "ich", "on", "akh", "iev", "ian"}, 630 new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{4, 5, 6, 5, 3, 1}, 0.1, 0.2, 0.0, 0.12, englishSanityChecks, true); 631 } 632 /** 633 * Imitation modern Russian, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers. 634 * <br> 635 * Zhydotuf ruts pitsas, gogutiar shyskuchebab - gichapofeglor giunuz ieskaziuzhin. 636 */ 637 public static final FakeLanguageGen RUSSIAN_ROMANIZED = russianRomanized().register("Russian Romanized"); 638 639 private static FakeLanguageGen russianAuthentic(){ 640 return new FakeLanguageGen( 641 new String[]{"а", "е", "ё", "и", "й", "о", "у", "ъ", "ы", "э", "ю", "я", "ы", "а", "а", "о", "у"}, 642 new String[]{}, 643 new String[]{"б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф", "х", "ц", 644 "б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф", "х", "ц", 645 "б", "в", "г", "д", "к", "л", "п", "р", "с", "т", "ф", 646 "ж", "м", "н", "з", "ч", "ш", "щ", 647 "бр", "ск", "тр", "бл", "гл", "кр", "гр"}, 648 new String[]{"бл", "бр", "пл", "дж", "тр", "гл", "гр", "кр"}, 649 new String[]{"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", 650 "в", "ф", "ск", "ск", "ск", "с", "б", "д", "д", "н", "р", "р"}, 651 new String[]{"одка", "одна", "уск", "аск", "ускы", "ад", "ар", "овйч", "ев", "ов", "оф", "агда", "ёцкы", "йч", "он", "ах", "ъв", "ян"}, 652 new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{4, 5, 6, 5, 3, 1}, 0.1, 0.2, 0.0, 0.12, null, true); 653 } 654 /** 655 * Imitation modern Russian, using the authentic Cyrillic alphabet used in Russia and other countries. 656 * Make sure the font you use to render this supports the Cyrillic alphabet! 657 * In the GDX display module, the "smooth" fonts support all the Cyrillic alphabet you need for this. 658 * <br> 659 * Жыдотуф руц пйцас, гогутяр шыскучэбаб - гйчапофёглор гюнуз ъсказюжин. 660 */ 661 public static final FakeLanguageGen RUSSIAN_AUTHENTIC = russianAuthentic().register("Russian Authentic"); 662 663 private static FakeLanguageGen japaneseRomanized(){ 664 return new FakeLanguageGen( 665 new String[]{"a", "a", "a", "a", "e", "e", "i", "i", "i", "i", "o", "o", "o", "u", "ou", "u", "ai", "ai"}, 666 new String[]{}, 667 new String[]{"k", "ky", "s", "sh", "t", "ts", "ch", "n", "ny", "h", "f", "hy", "m", "my", "y", "r", "ry", "g", 668 "gy", "z", "j", "d", "b", "by", "p", "py", 669 "k", "t", "n", "s", "k", "t", "d", "s", "sh", "sh", "g", "r", "b", 670 "k", "t", "n", "s", "k", "t", "b", "s", "sh", "sh", "g", "r", "b", 671 "k", "t", "n", "s", "k", "t", "z", "s", "sh", "sh", "ch", "ry", "ts" 672 }, 673 new String[]{"k", "ky", "s", "sh", "t", "ts", "ch", "n", "ny", "h", "f", "hy", "m", "my", "y", "r", "ry", "g", 674 "gy", "z", "j", "d", "b", "by", "p", "py", 675 "k", "t", "d", "s", "k", "t", "d", "s", "sh", "sh", "y", "j", "p", "r", "d", 676 "k", "t", "b", "s", "k", "t", "b", "s", "sh", "sh", "y", "j", "p", "r", "d", 677 "k", "t", "z", "s", "f", "g", "z", "b", "d", "ts", "sh", "m", 678 "k", "t", "z", "s", "f", "g", "z", "b", "d", "ts", "sh", "m", 679 "nn", "nn", "nd", "nz", "mm", "kk", "tt", "ss", "ssh", "tch"}, 680 new String[]{"n"}, 681 new String[]{"ima", "aki", "aka", "ita", "en", "izen", "achi", "uke", "aido", "outsu", "uki", "oku", "aku", "oto", "okyo"}, 682 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 4, 5, 4, 3}, 0.3, 0.9, 0.0, 0.07, japaneseSanityChecks, true); 683 } 684 /** 685 * Imitation Japanese, romanized to use the Latin alphabet. Likely to seem pretty fake to many readers. 686 * <br> 687 * Narurehyounan nikase keho... 688 */ 689 public static final FakeLanguageGen JAPANESE_ROMANIZED = japaneseRomanized().register("Japanese Romanized"); 690 691 private static FakeLanguageGen swahili(){ 692 return new FakeLanguageGen( 693 new String[]{"a", "i", "o", "e", "u", 694 "a", "a", "i", "o", "o", "e", "u", 695 "a", "a", "i", "o", "o", "u", 696 "a", "a", "i", "i", "o", 697 "a", "a", "a", "a", "a", 698 "a", "i", "o", "e", "u", 699 "a", "a", "i", "o", "o", "e", "u", 700 "a", "a", "i", "o", "o", "u", 701 "a", "a", "i", "i", "o", 702 "a", "a", "a", "a", "a", 703 "aa", "aa", "ue", "uo", "ii", "ea"}, 704 new String[]{}, 705 new String[]{ 706 "b", "h", "j", "l", "s", "y", "m", "n", 707 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 708 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 709 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 710 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 711 712 "b", "h", "j", "l", "s", "y", "m", "n", 713 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 714 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 715 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 716 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 717 718 "b", "h", "j", "l", "s", "y", "m", "n", 719 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 720 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 721 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 722 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 723 724 "b", "h", "j", "l", "s", "y", "m", "n", 725 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 726 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 727 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 728 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 729 730 "nb", "nj", "ns", "nz", 731 "nb", "nch", "nj", "ns", "ny", "nz", 732 "nb", "nch", "nf", "ng", "nj", "nk", "np", "ns", "nz", 733 "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nz", 734 "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nv", "nw", "nz", 735 736 "mb", "ms", "my", "mz", 737 "mb", "mch", "ms", "my", "mz", 738 "mb", "mch", "mk", "mp", "ms", "my", "mz", 739 "mb", "mch", "md", "mk", "mp", "ms", "mt", "my", "mz", 740 "mb", "mch", "md", "mf", "mg", "mj", "mk", "mp", "ms", "mt", "mv", "mw", "my", "mz", 741 "sh", "sh", "sh", "ny", "kw", 742 "dh", "th", "sh", "ny", 743 "dh", "th", "sh", "gh", "r", "ny", 744 "dh", "th", "sh", "gh", "r", "ny", 745 }, 746 new String[]{ 747 "b", "h", "j", "l", "s", "y", "m", "n", 748 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 749 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 750 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 751 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 752 753 "b", "h", "j", "l", "s", "y", "m", "n", 754 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 755 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 756 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 757 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 758 759 "b", "h", "j", "l", "s", "y", "m", "n", 760 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 761 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 762 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 763 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 764 765 "b", "h", "j", "l", "s", "y", "m", "n", 766 "b", "ch", "h", "j", "l", "s", "y", "z", "m", "n", 767 "b", "ch", "f", "g", "h", "j", "k", "l", "p", "s", "y", "z", "m", "n", 768 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "y", "z", "m", "n", "kw", 769 "b", "ch", "d", "f", "g", "h", "j", "k", "l", "p", "s", "t", "v", "w", "y", "z", "m", "n", "kw", 770 771 "nb", "nj", "ns", "nz", 772 "nb", "nch", "nj", "ns", "ny", "nz", 773 "nb", "nch", "nf", "ng", "nj", "nk", "np", "ns", "nz", 774 "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nz", 775 "nb", "nch", "nd", "nf", "ng", "nj", "nk", "np", "ns", "nt", "nw", "nz", 776 777 "mb", "ms", "my", "mz", 778 "mb", "mch", "ms", "my", "mz", 779 "mb", "mch", "mk", "mp", "ms", "my", "mz", 780 "mb", "mch", "md", "mk", "mp", "ms", "mt", "my", "mz", 781 "mb", "mch", "md", "mf", "mg", "mj", "mk", "mp", "ms", "mt", "mw", "my", "mz", 782 "sh", "sh", "sh", "ny", "kw", 783 "dh", "th", "sh", "ny", 784 "dh", "th", "sh", "gh", "r", "ny", 785 "dh", "th", "sh", "gh", "r", "ny", 786 "ng", "ng", "ng", "ng", "ng" 787 }, 788 new String[]{""}, 789 new String[]{"-@"}, 790 new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{3, 8, 6, 9, 2, 2}, 0.2, 1.0, 0.0, 0.12, null, true); 791 } 792 /** 793 * Swahili is one of the more commonly-spoken languages in sub-Saharan Africa, and serves mainly as a shared language 794 * that is often learned after becoming fluent in one of many other (vaguely-similar) languages of the area. An 795 * example sentence in Swahili, that this might try to imitate aesthetically, is "Mtoto mdogo amekisoma," meaning 796 * "The small child reads it" (where it is a book). A notable language feature used here is the redoubling of words, 797 * which is used in Swahili to emphasize or alter the meaning of the doubled word; here, it always repeats exactly 798 * and can't make minor changes like a real language might. This generates things like "gata-gata", "hapi-hapi", and 799 * "mimamzu-mimamzu", always separating with a hyphen here. 800 * <br> 801 * As an aside, please try to avoid the ugly stereotypes that fantasy media often assigns to speakers of African-like 802 * languages when using this or any of the generators. Many fantasy tropes come from older literature written with 803 * major cultural biases, and real-world cultural elements can be much more interesting to players than yet another 804 * depiction of a "jungle savage" with stereotypical traits. Consider drawing from existing lists of real-world 805 * technological discoveries, like https://en.wikipedia.org/wiki/History_of_science_and_technology_in_Africa , for 806 * inspiration when world-building; though some groups may not have developed agriculture by early medieval times, 807 * their neighbors may be working iron and studying astronomy just a short distance away. 808 * <br> 809 * Kondueyu; ma mpiyamdabota mise-mise nizakwaja alamsa amja, homa nkajupomba. 810 */ 811 public static final FakeLanguageGen SWAHILI = swahili().register("Swahili"); 812 813 private static FakeLanguageGen somali(){ 814 return new FakeLanguageGen( 815 new String[]{"a", "a", "a", "a", "a", "a", "a", "aa", "aa", "aa", 816 "e", "e", "ee", 817 "i", "i", "i", "i", "ii", 818 "o", "o", "o", "oo", 819 "u", "u", "u", "uu", "uu", 820 }, 821 new String[]{}, 822 new String[]{"b", "t", "j", "x", "kh", "d", "r", "s", "sh", "dh", "c", "g", "f", "q", "k", "l", "m", 823 "n", "w", "h", "y", 824 "x", "g", "b", "d", "s", "m", "dh", "n", "r", 825 "g", "b", "s", "dh", 826 }, 827 new String[]{ 828 "bb", "gg", "dd", "bb", "dd", "rr", "ddh", "cc", "gg", "ff", "ll", "mm", "nn", 829 "bb", "gg", "dd", "bb", "dd", "gg", 830 "bb", "gg", "dd", "bb", "dd", "gg", 831 "cy", "fk", "ft", "nt", "rt", "lt", "qm", "rdh", "rsh", "lq", 832 "my", "gy", "by", "lkh", "rx", "md", "bd", "dg", "fd", "mf", 833 "dh", "dh", "dh", "dh", 834 }, 835 new String[]{ 836 "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n", "h", 837 "x", "g", "b", "d", "s", "m", "q", "n", "r", 838 "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n", "h", 839 "x", "g", "b", "d", "s", "m", "q", "n", "r", 840 "b", "t", "j", "x", "kh", "d", "r", "s", "sh", "c", "g", "f", "q", "k", "l", "m", "n", 841 "g", "b", "d", "s", "q", "n", "r", 842 "b", "t", "x", "kh", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n", 843 "g", "b", "d", "s", "r", "n", 844 "b", "t", "kh", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n", 845 "g", "b", "d", "s", "r", "n", 846 "b", "t", "d", "r", "s", "sh", "g", "f", "q", "k", "l", "m", "n", 847 "g", "b", "d", "s", "r", "n", 848 }, 849 new String[]{"aw", "ow", "ay", "ey", "oy", "ay", "ay"}, 850 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 4, 5, 4, 1}, 0.25, 0.3, 0.0, 0.08, null, true); 851 } 852 /** 853 * Imitation Somali, using the Latin alphabet. Due to uncommon word structure, unusual allowed combinations of 854 * letters, and no common word roots with most familiar languages, this may seem like an unidentifiable or "alien" 855 * language to most readers. However, it's based on the Latin writing system for the Somali language (probably 856 * closest to the northern dialect), which due to the previously mentioned properties, makes it especially good for 857 * mixing with other languages to make letter combinations that seem strange to appear. It is unlikely that this 858 * particular generated language style will be familiar to readers, so it probably won't have existing stereotypes 859 * associated with the text. One early comment this received was, "it looks like a bunch of letters semi-randomly 860 * thrown together", which is probably a typical response (the comment was made by someone fluent in German and 861 * English, and most Western European languages are about as far as you can get from Somali). 862 * <br> 863 * Libor cat naqoxekh dhuugad gisiqir? 864 */ 865 public static final FakeLanguageGen SOMALI = somali().register("Somali"); 866 private static FakeLanguageGen hindi(){ 867 return new FakeLanguageGen( 868 new String[]{ 869 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī", 870 "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 871 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī", 872 "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 873 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī", 874 "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 875 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "ī", 876 "u", "u", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 877 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī", 878 "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 879 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī", 880 "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 881 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī", 882 "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 883 "a", "a", "a", "a", "a", "a", "ā", "ā", "i", "i", "i", "i", "ī", "i", "i", "ī", "ī", 884 "u", "u", "u", "ū", "u", "ū", "u", "ū", "e", "ai", "ai", "o", "o", "o", "au", 885 "aĕ", "aĕ", "aĕ", "aĕ", "aĕ", "āĕ", "āĕ", "iĕ", "iĕ", "iĕ", "īĕ", "īĕ", 886 "uĕ", "uĕ", "ūĕ", "aiĕ", "aiĕ", "oĕ", "oĕ", "oĕ", "auĕ", 887 //"aĭ", "aĭ", "aĭ", "aĭ", "aĭ", "āĭ", "āĭ", "iĭ", "iĭ", "iĭ", "īĭ", "īĭ", 888 //"uĭ", "uĭ", "ūĭ", "aiĭ", "aiĭ", "oĭ", "oĭ", "oĭ", "auĭ", 889 }, 890 new String[]{"á", "í", "ú", "ó", "á", "í", "ú", "ó", 891 }, 892 new String[]{ 893 "k", "k", "k", "k", "k", "k", "k", "k", "kŗ", "kŕ", "kļ", 894 "c", "c", "c", "c", "c", "c", "cŗ", "cŕ", "cļ", 895 "ţ", "t", "t", "t", "t", "t", "t", "t", "t", "t", "tŗ", "tŕ", "tŗ", "tŕ", 896 "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "pŗ", "pŕ", "pļ", "pĺ", "pŗ", "pŕ", "p", "p", 897 "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "kh", "khŗ", "khŕ", "khļ", "khĺ", 898 "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "chŗ", "chŕ", "chļ", "chĺ", 899 "ţh", "th", "th", "th", "th", "th", "th", "th", "th", "th", "thŗ", "thŕ", "thļ", "thĺ", 900 "ph", "ph", "ph", "ph", "ph", "ph", "ph", "phŗ", "phŕ", "phļ", "phĺ", 901 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 902 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 903 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 904 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 905 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 906 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 907 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 908 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 909 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 910 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 911 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 912 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 913 "g", "j", "đ", "d", "b", "gh", "jh", "đh", "dh", "bh", 914 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 915 "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh", 916 "ń", "ñ", "ņ", "n", "m", "h", "y", "r", "l", "v", "ś", "ş", "s", 917 "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh", 918 "ń", "ņ", "n", "m", "h", "y", "r", "l", "v", "ş", "s", 919 "g", "j", "đ", "d", "b", "gh", "đh", "dh", "bh", 920 "ń", "ņ", "n", "m", "h", "y", "r", "l", "v", "ş", "s", 921 "g", "đ", "d", "b", "gh", "đh", "dh", "bh", "n", "m", "v", "s", 922 "g", "đ", "d", "b", "g", "d", "b", "dh", "bh", "n", "m", "v", 923 "g", "đ", "d", "b", "g", "d", "b", "dh", "bh", "n", "m", "v", 924 }, 925 new String[]{ 926 "k", "k", "k", "k", "k", "nk", "rk", 927 "k", "k", "k", "k", "k", "nk", "rk", 928 "k", "k", "k", "k", "k", "nk", "rk", 929 "k", "k", "k", "k", "k", "nk", "rk", 930 "k", "k", "k", "k", "k", "nk", "rk", 931 "k", "k", "k", "k", "k", "nk", "rk", 932 "k", "k", "k", "k", "k", "nk", "rk", 933 "k", "k", "k", "k", "k", "nk", "rk", 934 "kŗ", "kŗ", "kŗ", "kŗ", "kŗ", "nkŗ", "rkŗ", 935 "kŕ", "kŕ", "kŕ", "kŕ", "kŕ", "nkŕ", "rkŕ", 936 "kļ", "kļ", "kļ", "kļ", "kļ", "nkļ", "rkļ", 937 938 "c", "c", "c", "c", "c", "c", "cŗ", "cŕ", "cļ", 939 "ţ", "t", "t", "t", "t", "t", "nt", "rt", 940 "ţ", "t", "t", "t", "t", "nt", "rt", 941 "ţ", "t", "t", "t", "t", "nt", "rt", 942 "ţ", "t", "t", "t", "t", "nt", "rt", 943 "ţ", "t", "t", "t", "t", "nt", "rt", 944 "ţ", "t", "t", "t", "t", "nt", "rt", 945 "ţ", "t", "t", "t", "t", "nt", "rt", 946 "ţ", "t", "t", "t", "t", "nt", "rt", 947 "ţ", "t", "t", "t", "t", "nt", "rt", 948 "tŗ", "tŗ", "tŗ", "tŗ", "tŗ", "ntŗ", "rtŗ", 949 "tŕ", "tŕ", "tŕ", "tŕ", "tŕ", "ntŕ", "rtŕ", 950 "tŗ", "tŗ", "tŗ", "tŗ", "tŗ", "ntŗ", "rtŗ", 951 "tŕ", "tŕ", "tŕ", "tŕ", "tŕ", "ntŕ", "rtŕ", 952 953 "p", "p", "p", "p", "p", "np", "rp", 954 "p", "p", "p", "p", "p", "np", "rp", 955 "p", "p", "p", "p", "p", "np", "rp", 956 "p", "p", "p", "p", "p", "np", "rp", 957 "p", "p", "p", "p", "p", "np", "rp", 958 "p", "p", "p", "p", "p", "np", "rp", 959 "p", "p", "p", "p", "p", "np", "rp", 960 "p", "p", "p", "p", "p", "np", "rp", 961 "p", "p", "p", "p", "p", "np", "rp", 962 "p", "p", "p", "p", "p", "np", "rp", 963 "pŗ", "pŗ", "pŗ", "pŗ", "pŗ", "npŗ", "rpŗ", 964 "pŕ", "pŕ", "pŕ", "pŕ", "pŕ", "npŕ", "rpŕ", 965 "pļ", "pļ", "pļ", "pļ", "pļ", "npļ", "rpļ", 966 "pĺ", "pĺ", "pĺ", "pĺ", "pĺ", "npĺ", "rpĺ", 967 "pŗ", "pŗ", "pŗ", "pŗ", "pŗ", "npŗ", "rpŗ", 968 "pŕ", "pŕ", "pŕ", "pŕ", "pŕ", "npŕ", "rpŕ", 969 "p", "p", "p", "p", "p", "np", "rp", 970 "p", "p", "p", "p", "p", "np", "rp", 971 972 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 973 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 974 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 975 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 976 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 977 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 978 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 979 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 980 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 981 "kh", "kh", "kh", "kh", "kh", "nkh", "rkh", 982 "khŗ", "khŗ", "khŗ", "khŗ", "khŗ", "nkhŗ", "rkhŗ", 983 "khŕ", "khŕ", "khŕ", "khŕ", "khŕ", "nkhŕ", "rkhŕ", 984 "khļ", "khļ", "khļ", "khļ", "khļ", "nkhļ", "rkhļ", 985 "khĺ", "khĺ", "khĺ", "khĺ", "khĺ", "nkhĺ", "rkhĺ", 986 987 "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "ch", "chŗ", "chŕ", "chļ", "chĺ", 988 "ţh", "th", "th", "th", "th", "th", "nth", "rth", 989 "th", "th", "th", "th", "th", "nth", "rth", 990 "th", "th", "th", "th", "th", "nth", "rth", 991 "th", "th", "th", "th", "th", "nth", "rth", 992 "th", "th", "th", "th", "th", "nth", "rth", 993 "th", "th", "th", "th", "th", "nth", "rth", 994 "th", "th", "th", "th", "th", "nth", "rth", 995 "th", "th", "th", "th", "th", "nth", "rth", 996 "th", "th", "th", "th", "th", "nth", "rth", 997 "thŗ", "thŗ", "thŗ", "thŗ", "thŗ", "nthŗ", "rthŗ", 998 "thŕ", "thŕ", "thŕ", "thŕ", "thŕ", "nthŕ", "rthŕ", 999 "thļ", "thļ", "thļ", "thļ", "thļ", "nthļ", "rthļ", 1000 "thĺ", "thĺ", "thĺ", "thĺ", "thĺ", "nthĺ", "rthĺ", 1001 1002 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1003 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1004 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1005 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1006 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1007 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1008 "ph", "ph", "ph", "ph", "ph", "nph", "rph", 1009 "phŗ", "phŗ", "phŗ", "phŗ", "phŗ", "nphŗ", "rphŗ", 1010 "phŕ", "phŕ", "phŕ", "phŕ", "phŕ", "nphŕ", "rphŕ", 1011 "phļ", "phļ", "phļ", "phļ", "phļ", "nphļ", "rphļ", 1012 "phĺ", "phĺ", "phĺ", "phĺ", "phĺ", "nphĺ", "rphĺ", 1013 1014 "g", "g", "g", "g", "g", "ng", "rg", 1015 "j", "j", "j", "j", "j", "nj", "rj", 1016 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1017 "d", "d", "d", "d", "d", "nd", "rd", 1018 "b", "b", "b", "b", "b", "nb", "rb", 1019 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1020 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1021 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1022 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1023 1024 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1025 "h", "y", "y", "y", "y", "y", "ny", "ry", 1026 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1027 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1028 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1029 "s", "s", "s", "s", "s", "ns", "rs", 1030 1031 "g", "g", "g", "g", "g", "ng", "rg", 1032 "j", "j", "j", "j", "j", "nj", "rj", 1033 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1034 "d", "d", "d", "d", "d", "nd", "rd", 1035 "b", "b", "b", "b", "b", "nb", "rb", 1036 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1037 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1038 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1039 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1040 1041 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1042 "h", "y", "y", "y", "y", "y", "ny", "ry", 1043 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1044 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1045 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1046 "s", "s", "s", "s", "s", "ns", "rs", 1047 1048 "g", "g", "g", "g", "g", "ng", "rg", 1049 "j", "j", "j", "j", "j", "nj", "rj", 1050 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1051 "d", "d", "d", "d", "d", "nd", "rd", 1052 "b", "b", "b", "b", "b", "nb", "rb", 1053 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1054 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1055 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1056 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1057 1058 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1059 "h", "y", "y", "y", "y", "y", "ny", "ry", 1060 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1061 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1062 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1063 "s", "s", "s", "s", "s", "ns", "rs", 1064 1065 "g", "g", "g", "g", "g", "ng", "rg", 1066 "j", "j", "j", "j", "j", "nj", "rj", 1067 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1068 "d", "d", "d", "d", "d", "nd", "rd", 1069 "b", "b", "b", "b", "b", "nb", "rb", 1070 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1071 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1072 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1073 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1074 1075 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1076 "h", "y", "y", "y", "y", "y", "ny", "ry", 1077 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1078 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1079 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1080 "s", "s", "s", "s", "s", "ns", "rs", 1081 1082 "g", "g", "g", "g", "g", "ng", "rg", 1083 "j", "j", "j", "j", "j", "nj", "rj", 1084 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1085 "d", "d", "d", "d", "d", "nd", "rd", 1086 "b", "b", "b", "b", "b", "nb", "rb", 1087 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1088 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1089 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1090 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1091 1092 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1093 "h", "y", "y", "y", "y", "y", "ny", "ry", 1094 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1095 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1096 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1097 "s", "s", "s", "s", "s", "ns", "rs", 1098 1099 "g", "g", "g", "g", "g", "ng", "rg", 1100 "j", "j", "j", "j", "j", "nj", "rj", 1101 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1102 "d", "d", "d", "d", "d", "nd", "rd", 1103 "b", "b", "b", "b", "b", "nb", "rb", 1104 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1105 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1106 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1107 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1108 1109 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1110 "h", "y", "y", "y", "y", "y", "ny", "ry", 1111 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1112 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1113 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1114 "s", "s", "s", "s", "s", "ns", "rs", 1115 1116 "g", "g", "g", "g", "g", "ng", "rg", 1117 "j", "j", "j", "j", "j", "nj", "rj", 1118 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1119 "d", "d", "d", "d", "d", "nd", "rd", 1120 "b", "b", "b", "b", "b", "nb", "rb", 1121 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1122 "jh", "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1123 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1124 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1125 1126 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1127 "h", "y", "y", "y", "y", "y", "ny", "ry", 1128 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1129 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1130 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1131 "s", "s", "s", "s", "s", "ns", "rs", 1132 1133 "g", "g", "g", "g", "g", "ng", "rg", 1134 "j", "j", "j", "j", "j", "nj", "rj", 1135 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1136 "d", "d", "d", "d", "d", "nd", "rd", 1137 "b", "b", "b", "b", "b", "nb", "rb", 1138 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1139 "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1140 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1141 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1142 1143 "ń", "ñ", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1144 "h", "y", "y", "y", "y", "y", "ny", "ry", 1145 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1146 "ś", "ś", "ś", "ś", "ś", "nś", "rś", 1147 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1148 "s", "s", "s", "s", "s", "ns", "rs", 1149 1150 "g", "g", "g", "g", "g", "ng", "rg", 1151 "j", "j", "j", "j", "j", "nj", "rj", 1152 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1153 "d", "d", "d", "d", "d", "nd", "rd", 1154 "b", "b", "b", "b", "b", "nb", "rb", 1155 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1156 "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1157 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1158 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1159 1160 "ń", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1161 "h", "y", "y", "y", "y", "y", "ny", "ry", 1162 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1163 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1164 "s", "s", "s", "s", "s", "ns", "rs", 1165 1166 "g", "g", "g", "g", "g", "ng", "rg", 1167 "j", "j", "j", "j", "j", "nj", "rj", 1168 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1169 "d", "d", "d", "d", "d", "nd", "rd", 1170 "b", "b", "b", "b", "b", "nb", "rb", 1171 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1172 "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1173 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1174 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1175 1176 "ń", "ņ", "n", "m", "m", "m", "m", "m", "nm", "rm", 1177 "h", "y", "y", "y", "y", "y", "ny", "ry", 1178 "r", "l", "v", "v", "v", "v", "v", "nv", "rv", 1179 "ş", "ş", "ş", "ş", "ş", "nş", "rş", 1180 "s", "s", "s", "s", "s", "ns", "rs", 1181 1182 "g", "g", "g", "g", "g", "ng", "rg", 1183 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1184 "d", "d", "d", "d", "d", "nd", "rd", 1185 "b", "b", "b", "b", "b", "nb", "rb", 1186 "gh", "gh", "gh", "gh", "gh", "ngh", "rgh", 1187 "đh", "đh", "đh", "đh", "đh", "nđh", "rđh", 1188 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1189 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1190 "n", "m", "m", "m", "m", "m", "nm", "rm", 1191 "v", "v", "v", "v", "v", "nv", "rv", 1192 "s", "s", "s", "s", "s", "ns", "rs", 1193 1194 "g", "g", "g", "g", "g", "ng", "rg", 1195 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1196 "d", "d", "d", "d", "d", "nd", "rd", 1197 "b", "b", "b", "b", "b", "nb", "rb", 1198 "g", "g", "g", "g", "g", "ng", "rg", 1199 "d", "d", "d", "d", "d", "nd", "rd", 1200 "b", "b", "b", "b", "b", "nb", "rb", 1201 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1202 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1203 "n", "m", "m", "m", "m", "m", "nm", "rm", 1204 "v", "v", "v", "v", "v", "nv", "rv", 1205 1206 "g", "g", "g", "g", "g", "ng", "rg", 1207 "đ", "đ", "đ", "đ", "đ", "nđ", "rđ", 1208 "d", "d", "d", "d", "d", "nd", "rd", 1209 "b", "b", "b", "b", "b", "nb", "rb", 1210 "g", "g", "g", "g", "g", "ng", "rg", 1211 "d", "d", "d", "d", "d", "nd", "rd", 1212 "b", "b", "b", "b", "b", "nb", "rb", 1213 "dh", "dh", "dh", "dh", "dh", "ndh", "rdh", 1214 "bh", "bh", "bh", "bh", "bh", "nbh", "rbh", 1215 "n", "m", "m", "m", "m", "m", "nm", "rm", 1216 "v", "v", "v", "v", "v", "nv", "rv", 1217 }, 1218 new String[]{"t", "d", "m", "r", "dh", "b", "t", "d", "m", "r", "dh", "bh", "nt", "nt", "nk", "ş"}, 1219 new String[]{"it", "it", "ati", "adva", "aş", "arma", "ardha", "abi", "ab", "aya"}, 1220 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{1, 2, 3, 3, 1}, 0.15, 0.75, 0.0, 0.12, null, true); 1221 } 1222 /** 1223 * Imitation Hindi, romanized to use the Latin alphabet using accented glyphs similar to the IAST standard. 1224 * Most fonts do not support the glyphs that IAST-standard romanization of Hindi needs, so this uses alternate 1225 * glyphs from at most Latin Extended-A. Relative to the IAST standard, the glyphs {@code "ṛṝḷḹḍṭṅṇṣṃḥ"} become 1226 * {@code "ŗŕļĺđţńņşĕĭ"}, with the nth glyph in the first string being substituted with the nth glyph in the second 1227 * string. You may want to get a variant on this language with {@link #removeAccents()} if you can't display the 1228 * less-commonly-supported glyphs {@code āīūĕĭáíúóŗŕļţĺđńñņśş}. For some time SquidLib had a separate version of 1229 * imitation Hindi that was accurate to the IAST standard, but this version is more usable because font support is 1230 * much better for the glyphs it uses, so the IAST kind was removed (it added quite a bit of code for something that 1231 * was mostly unusable). 1232 * <br> 1233 * Darvāga yar; ghađhinopŕauka āĕrdur, conśaigaijo śabhodhaĕđū jiviđaudu. 1234 */ 1235 public static final FakeLanguageGen HINDI_ROMANIZED = hindi().register("Hindi Romanized"); 1236 1237 private static FakeLanguageGen arabic(){ 1238 return new FakeLanguageGen( 1239 new String[]{"a", "a", "a", "a", "a", "a", "aa", "aa", "aa", "ai", "au", 1240 "a", "i", "u", "a", "i", "u", 1241 "i", "i", "i", "i", "i", "ii", "ii", "ii", 1242 "u", "u", "u", "uu", "uu", 1243 }, 1244 new String[]{}, 1245 new String[]{"gh", "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw", 1246 "zh", "khm", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1247 "q", "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r", 1248 "q", "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1249 "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1250 "al-", "al-", "ibn-", 1251 }, 1252 new String[]{ 1253 "kk", "kk", "kk", "kk", "kk", "dd", "dd", "dd", "dd", 1254 "nj", "mj", "bj", "mj", "bj", "mj", "bj", "dj", "dtj", "dhj", 1255 "nz", "nzh", "mz", "mzh", "rz", "rzh", "bz", "dz", "tz", 1256 "s-h", "sh-h", "shw-h", "tw", "bn", "fq", "hz", "hl", "khm", 1257 "lb", "lz", "lj", "lf", "ll", "lk", "lq", "lg", "ln" 1258 }, 1259 new String[]{ 1260 "gh", "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw", "dt", "jj", 1261 "zh", "khm", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1262 "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r", 1263 "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1264 "k", "f", "l", "z", "h", "h", "j", "s", "r", 1265 "b", "t", "th", "j", "kh", "khr", "d", "dh", "r", "z", "s", "sh", "shw", "dt", "jj", 1266 "zh", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1267 "k", "q", "k", "b", "d", "f", "l", "z", "zh", "h", "h", "kh", "j", "s", "sh", "shw", "r", 1268 "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1269 "k", "f", "l", "z", "h", "h", "j", "s", "r", 1270 }, 1271 new String[]{"aagh", "aagh", "ari", "ari", "aiid", "uuq", "ariid", "adih", "ateh", "adesh", "amiit", "it", 1272 "iit", "akhmen", "akhmed", "ani", "abiib", "iib", "uuni", "iiz", "aqarii", "adiiq", 1273 }, 1274 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 5, 5, 1}, 0.55, 0.65, 0.0, 0.15, arabicSanityChecks, true); 1275 } 1276 /** 1277 * Imitation Arabic, using mostly the Latin alphabet but with some Greek letters for tough transliteration topics. 1278 * It's hard to think of a more different (widely-spoken) language to romanize than Arabic. Written Arabic does not 1279 * ordinarily use vowels (the writing system is called an abjad, in contrast to an alphabet), and it has more than a 1280 * few sounds that are very different from those in English. This version, because of limited support in fonts and 1281 * the need for separate words to be distinguishable with regular expressions, uses somewhat-accurate digraphs or 1282 * trigraphs instead of the many accented glyphs (not necessarily supported by most fonts) in most romanizations of 1283 * Arabic, and this scheme uses no characters from outside ASCII. 1284 * <br> 1285 * Please try to be culturally-sensitive about how you use this generator. Classical Arabic (the variant that 1286 * normally marks vowels explicitly and is used to write the Qur'an) has deep religious significance in Islam, and 1287 * if you machine-generate text that (probably) isn't valid Arabic, but claim that it is real, or that it has 1288 * meaning when it actually doesn't, that would be an improper usage of what this generator is meant to do. In a 1289 * fantasy setting, you can easily confirm that the language is fictional and any overlap is coincidental; an 1290 * example of imitation Arabic in use is the Dungeons and Dragons setting, Al-Qadim, which according to one account 1291 * sounds similar to a word in real Arabic (that does not mean anything like what the designer was aiming for). In a 1292 * historical setting, FakeLanguageGen is probably "too fake" to make a viable imitation for any language, and may 1293 * just sound insulting if portrayed as realistic. You may want to mix ARABIC_ROMANIZED with a very different kind 1294 * of language, like GREEK_ROMANIZED or RUSSIAN_AUTHENTIC, to emphasize that this is not a real-world language. 1295 * <br> 1296 * Hiijakki al-aafusiib rihit, ibn-ullukh aj shwisari! 1297 */ 1298 public static final FakeLanguageGen ARABIC_ROMANIZED = arabic().register("Arabic Romanized"); 1299 /* 1300 public static final FakeLanguageGen ARABIC_ROMANIZED = new FakeLanguageGen( 1301 new String[]{"a", "a", "a", "a", "a", "a", "ā", "ā", "ā", "ai", "au", 1302 "a", "i", "u", "a", "i", "u", 1303 "i", "i", "i", "i", "i", "ī", "ī", "ī", 1304 "u", "u", "u", "ū", "ū", 1305 }, 1306 new String[]{}, 1307 new String[]{"δ", "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť", 1308 "ż", "ξ", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1309 "q", "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r", 1310 "q", "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1311 "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1312 "al-", "al-", "ibn-", 1313 }, 1314 new String[]{ 1315 "kk", "kk", "kk", "kk", "kk", "dd", "dd", "dd", "dd", 1316 "nj", "mj", "bj", "mj", "bj", "mj", "bj", "dj", "ďj", "đj", 1317 "nz", "nż", "mz", "mż", "rz", "rż", "bz", "dz", "tz", 1318 "s-h", "š-h", "ş-h", "tw", "bn", "fq", "hz", "hl", "ĥm", 1319 "lb", "lz", "lj", "lf", "ll", "lk", "lq", "lg", "ln" 1320 }, 1321 new String[]{ 1322 "δ", "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť", 1323 "ż", "ξ", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1324 "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r", 1325 "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1326 "k", "f", "l", "z", "h", "h", "j", "s", "r", 1327 "b", "t", "ţ", "j", "ĥ", "ħ", "d", "đ", "r", "z", "s", "š", "ş", "ď", "ť", 1328 "ż", "g", "f", "q", "k", "l", "m", "n", "h", "w", 1329 "k", "q", "k", "b", "d", "f", "l", "z", "ż", "h", "h", "ĥ", "j", "s", "š", "ş", "r", 1330 "k", "q", "k", "f", "l", "z", "h", "h", "j", "s", "r", 1331 "k", "f", "l", "z", "h", "h", "j", "s", "r", 1332 }, 1333 new String[]{"āδ", "āδ", "ari", "ari", "aīd", "ūq", "arīd", "adih", "ateh", "adeš", "amīt", "it", 1334 "īt", "aĥmen", "aĥmed", "ani", "abīb", "īb", "ūni", "īz", "aqarī", "adīq", 1335 }, 1336 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 5, 5, 1}, 0.55, 0.65, 0.0, 0.15, arabicSanityChecks, true); 1337 */ 1338 1339 private static FakeLanguageGen inuktitut(){ 1340 return new FakeLanguageGen( 1341 new String[]{"a", "a", "a", "a", "a", "aa", "aa", "aa", "aa", "i", "i", "i", "ii", "ii", "u", "u", "u", "uu", "uu", "ai", "ia", "iu", "ua", "ui"}, 1342 new String[]{}, 1343 new String[]{"p", "t", "k", "q", "s", "l", "h", "v", "j", "g", "r", "m", "n", 1344 "t", "t", "t", "t", "k", "k", "q", "q", "n", "n", "n", "n", "g", "l"}, 1345 new String[]{"pp", "tt", "kk", "pk", "tk", "gk", "kp", "kt", "kg", "pq", "tq", "gq", "ss", "ll", "rr", "mm", 1346 "nn", "nng", "ng", "ng", 1347 "ll", "nn", "nn", "nn",}, 1348 new String[]{"n", "t", "q", "k", "n", "t", "q", "k", "n", "t", "q", "k", "n", "t", "q", "k", "p", "s", "m", "g", "g", "ng", "ng", "ng"}, 1349 new String[]{"itut", "uit", "uq", "iuq", "iaq", "aq", "it", "aat", "aak", "aan", "ait", "ik", "uut", "un", "unnun", 1350 "ung", "ang", "ing", "iin", "iit", "iik", "in", 1351 "uq", "iaq", "aq", "ik", "it", "uit", "ut", "ut", "at", "un", "in" 1352 }, 1353 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 4, 6, 5, 4}, 0.45, 0.0, 0.0, 0.25, null, true); 1354 } 1355 /** 1356 * Imitation text from an approximation of one of the Inuktitut languages spoken by various people of the Arctic and 1357 * nearby areas. This is likely to be hard to pronounce. Inuktitut is the name accepted in Canada for one language 1358 * family of that area, but other parts of the Arctic circle speak languages with varying levels of difference from 1359 * this style of generated text. The term "Inuit language" may be acceptable, but "Eskimo language" is probably not, 1360 * and when that term is not considered outright offensive it refers to a different language group anyway (more 1361 * properly called Yupik or Yup'ik, and primarily spoken in Siberia instead of Canada and Alaska). 1362 * <br> 1363 * Ugkangungait ninaaq ipkutuilluuq um aitqiinnaitunniak tillingaat. 1364 */ 1365 public static final FakeLanguageGen INUKTITUT = inuktitut().register("Inuktitut"); 1366 1367 private static FakeLanguageGen norse(){ 1368 return new FakeLanguageGen( 1369 new String[]{"a","a","a","á","á","au","e","e","e","é","é","ei","ey","i","i","í","í","y","y","ý","ý", 1370 "o","o","o","ó","ó","u","u","u","ú","ú","æ","æ","æ","ö","ö",}, 1371 new String[]{}, 1372 new String[]{"b","bl","br","bj","d","dr","dj","ð","ðl","ðr","f","fl","flj","fr","fn","fj","g","gn","gj","h", 1373 "hj","hl","hr","hv","j","k","kl","kr","kn","kj","l","lj","m","mj","n","nj","p","pl","pr","pj","r", 1374 "rj","s","sj","sl","sn","sp","st","str","skr","skj","sþ","sð","t","tj","v","vl","vr","vj","þ","þl","þr", 1375 1376 "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð", 1377 "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð", 1378 "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð", 1379 "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð", 1380 "d","f","fl","g","gl","gr","k","h","hr","n","k","l","m","mj","n","r","s","st","t","þ","ð", 1381 1382 "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t", 1383 "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t", 1384 "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t", 1385 "d","d","f","f","fl","g","g","g","gl","gr","k","h","hr","n","k","kl","l","n","r","r","s","st","t","t", 1386 }, 1387 new String[]{"bd","bf","bg","bk","bl","bp","br","bt","bv","bm","bn","bð","bj", 1388 "db","df","dg","dk","dl","dp","dr","dt","dv","dm","dn","dð","dþ","dj","ndk","ndb","ndg","ndl","nds","nds", 1389 "ðl","ðr","ðk","ðj","ðg","ðd","ðb","ðp","ðs", 1390 "fb","fd","fg","fk","fl","fp","fr","fs","ft","fv","fm","fn","fð","fj", 1391 "gb","gd","gf","gk","gl","gp","gr","gt","gv","gm","gn","gð","gj", 1392 "h","hj","hl","hr","hv", 1393 "kb","kd","kf","kp","kv","km","kn","kð","kl","kr","nkj","nkr","nkl", 1394 "lbr","ldr","lfr","lg","lgr","lj","lkr","ln","ls","ltr","lv","lð","lðr","lþ", 1395 "mb","md","mk","mg","ml","mp","mr","ms","mt","mv","mð","mþ","mj", 1396 "nb","nl","np","nr","nv","nð","nþ","nj", 1397 "ngl","ngb","ngd","ngk","ngp","ngt","ngv","ngm","ngð","ngþ","ngr", 1398 "mbd","mbg","mbs","mbt","ldg","ldn","ldk","lds","rðn","rðl","gðs","gðr", 1399 "pb","pd","pg","pk","pl","pr","ps","psj","pð","pj", 1400 "rl","rbr","rdr","rg","rgr","rkr","rpr","rs","rts","rtr","rv","rj", 1401 "sb","sbr","sd","sdr","sf","sfj","sg","skr","skl","sm","sn","str","sv","sð","sþ","sj", 1402 "tr","tn","tb","td","tg","tv","tf","tj","tk","tm","tp",}, 1403 new String[]{"kk","ll","nn","pp","tt","kk","ll","nn","pp","tt", 1404 "bs","ds","gs","x","rn","gn","gt","gs","ks","kt","nt","nd","nk","nt","ng","ngs","ns", 1405 "ps","pk","pt","pts","lb","ld","lf","lk","lm","lp","lps","lt", 1406 "rn","rb","rd","rk","rp","rt","rm","rð","rþ","sk","sp","st","ts", 1407 "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ", 1408 "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ", 1409 "b","d","ð","f","g","gn","h","k","nk","l","m","n","ng","p","r","s","sp","st","sþ","sð","t","v","þ", 1410 1411 "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t", 1412 "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t", 1413 "b","b","b","d","d","d","f","f","f","g","g","k","k","nk","l","n","ng","p","p","r","r","r","s","s","st","t","t", 1414 }, 1415 new String[]{"etta","eþa","uinn","ing","ard","eign","ef","efs","eg","ir","ir","ir","ir","ír","ír","ar","ar", 1416 "ar","ár","or","or","ór","ör","on","on","ón","onn","unn","ung","ut","ett","att","ot"}, 1417 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 5, 4, 3, 1}, 0.25, 0.5, 0.0, 0.08, genericSanityChecks, true); 1418 } 1419 /** 1420 * Somewhat close to Old Norse, which is itself very close to Icelandic, so this uses Icelandic spelling rules. Not 1421 * to be confused with the language(s) of Norway, where the Norwegian languages are called norsk, and are further 1422 * distinguished into Bokmål and Nynorsk. This should not be likely to seem like any form of Norwegian, since it 1423 * doesn't have the a-with-ring letter 'å' and has the letters eth ('Ðð') and thorn ('Þþ'). If you want to remove 1424 * any letters not present on a US-ASCII keyboard, you can use {@link Modifier#SIMPLIFY_NORSE} on this language or 1425 * some mix of this with other languages; it also changes some of the usage of "j" where it means the English "y" 1426 * sound, making "fjord" into "fyord", which is closer to familiar uses from East Asia like "Tokyo" and "Pyongyang". 1427 * You can also now use {@link #NORSE_SIMPLIFIED} directly, which is probably easiest. 1428 * <br> 1429 * Leyrk tjör stomri kna snó æd ðrépdápá, prygso? 1430 */ 1431 public static final FakeLanguageGen NORSE = norse().register("Norse"); 1432 1433 private static FakeLanguageGen nahuatl(){ 1434 return new FakeLanguageGen( 1435 new String[]{"a", "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "i", "o", "o", "o", "e", "e", "eo", "oa", "ea"}, 1436 new String[]{}, 1437 new String[]{"ch", "c", "h", "m", "l", "n", "p", "t", "tl", "tz", "x", "y", "z", "hu", "cu", 1438 "l", "l", "l", "p", "p", "t", "t", "t", "t", "t", "tl", "tl", "tz", "z", "x", "hu"}, 1439 new String[]{"zp", "ztl", "zc", "zt", "zl", "ct", "cl", "pl", "mt", "mc", "mch", "cz", "tc", "lc", 1440 "hu", "hu", "hu", "cu"}, 1441 new String[]{ 1442 "ch", "c", "h", "m", "l", "n", "p", "t", "tl", "tz", "x", "y", "z", 1443 "l", "l", "l", "l", "p", "t", "t", "t", "tl", "tl", "tz", "tz", "z", "x" 1444 }, 1445 new String[]{"otl", "eotl", "ili", "itl", "atl", "atli", "oca", "itli", "oatl", "al", "ico", "acual", 1446 "ote", "ope", "oli", "ili", "acan", "ato", "atotl", "ache", "oc", "aloc", "ax", "itziz", "iz" 1447 }, 1448 new String[]{}, new int[]{1, 2, 3, 4, 5, 6}, new double[]{3, 4, 5, 4, 3, 1}, 0.3, 0.2, 0.0, 0.3, genericSanityChecks, true) 1449 .addModifiers(new Modifier("c([ie])", "qu$1"), 1450 new Modifier("z([ie])", "c$1")); 1451 } 1452 1453 /** 1454 * Imitation text from an approximation of the language spoken by the Aztec people and also over a million 1455 * contemporary people in parts of Mexico. This is may be hard to pronounce, since it uses "tl" as a normal 1456 * consonant (it can start or end words), but is mostly a fairly recognizable style of language. 1457 * <br> 1458 * Olcoletl latl palitz ach; xatatli tzotloca amtitl, xatloatzoatl tealitozaztitli otamtax? 1459 */ 1460 public static final FakeLanguageGen NAHUATL = nahuatl().register("Nahuatl"); 1461 1462 private static FakeLanguageGen mongolian(){ 1463 return new FakeLanguageGen( 1464 new String[]{"a", "a", "a", "a", "a", "a", "a", "aa", "aa", "e", "i", "i", "i", "i", "i", "i", "i", "i", "ii", 1465 "o", "o", "o", "o", "oo", "u", "u", "u", "u", "u", "u", "u", "u", "uu", "uu", "ai", "ai"}, 1466 new String[]{}, 1467 new String[]{"g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "j", "j", "j", "j", 1468 "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "y", "y"}, 1469 new String[]{}, 1470 new String[]{"g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "gh", "j", "j", "j", 1471 "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "g", "n", 1472 "g", "m", "n", "g", "m", "n", "g", "m", "n", "n", "n", "ch", "gh", "ch", "gh", "gh", "gh", "j", "j", "j", "n", 1473 "s", "s", "s", "t", "ts", "kh", "r", "r", "l", "h", "h", "h", "h", "h", "b", "b", "b", "b", "z", "z", "y", "y", 1474 "ng", "ng", "ng", "ngh", "ngh", "lj", "gch", "sd", "rl", "bl", "sd", "st", "md", "mg", "gd", "gd", 1475 "sv", "rg", "rg", "mr", "tn", "tg", "ds", "dh", "dm", "gts", "rh", "lb", "gr", "gy", "rgh"}, 1476 new String[]{"ei", "ei", "ei", "uulj", "iig", "is", "is", "an", "aan", "iis", "alai", "ai", "aj", "ali" 1477 }, 1478 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 9, 3, 1}, 0.3, 0.2, 0.0, 0.07, null, true); 1479 } 1480 1481 /** 1482 * Imitation text from an approximation of one of the languages spoken in the 13th-century Mongol Empire. Can be 1483 * hard to pronounce. This is closest to Middle Mongolian, and is probably not the best way to approximate modern 1484 * Mongolian, which was written for many years in the Cyrillic alphabet (same alphabet as Russian) and has changed a 1485 * lot in other ways. 1486 * <br> 1487 * Ghamgarg zilijuub lirgh arghar zunghichuh naboogh. 1488 */ 1489 public static final FakeLanguageGen MONGOLIAN = mongolian().register("Mongolian"); 1490 1491 /** 1492 * A mix of four different languages, using only ASCII characters, that is meant for generating single words for 1493 * creature or place names in fantasy settings. 1494 * <br> 1495 * Adeni, Sainane, Caneros, Sune, Alade, Tidifi, Muni, Gito, Lixoi, Bovi... 1496 */ 1497 public static final FakeLanguageGen FANTASY_NAME = GREEK_ROMANIZED.mix( 1498 RUSSIAN_ROMANIZED.mix( 1499 FRENCH.removeAccents().mix( 1500 JAPANESE_ROMANIZED, 0.5), 0.85), 0.925).register("Fantasy"); 1501 /** 1502 * A mix of four different languages with some accented characters added onto an ASCII base, that can be good for 1503 * generating single words for creature or place names in fantasy settings that should have a "fancy" feeling from 1504 * having unnecessary accents added primarily for visual reasons. 1505 * <br> 1506 * Askieno, Blarcīnũn, Mēmida, Zizhounkô, Blęrinaf, Zemĭ, Mónazôr, Renerstă, Uskus, Toufounôr... 1507 */ 1508 public static final FakeLanguageGen FANCY_FANTASY_NAME = FANTASY_NAME.addAccents(0.47, 0.07).register("Fancy Fantasy"); 1509 1510 private static FakeLanguageGen goblin(){ 1511 return new FakeLanguageGen( 1512 new String[]{"a", "a", "a", "a", 1513 "e", "e", 1514 "i", "i", "i", 1515 "o", "o", "o", "o", 1516 "u", "u", "u", "u", "u", "u", "u", 1517 }, 1518 new String[]{}, 1519 new String[]{"b", "g", "d", "m", "h", "n", "r", "v", "sh", "p", "w", "y", "f", "br", "dr", "gr", "pr", "fr", 1520 "br", "dr", "gr", "pr", "fr", "bl", "dw", "gl", "gw", "pl", "fl", "hr", 1521 "b", "g", "d", "m", "h", "n", "r", "b", "g", "d", "m", "h", "n", "r", 1522 "b", "g", "d", "m", "r", "b", "g", "d", "r", 1523 }, 1524 new String[]{ 1525 "br", "gr", "dr", "pr", "fr", "rb", "rd", "rg", "rp", "rf", 1526 "br", "gr", "dr", "rb", "rd", "rg", 1527 "mb", "mg", "md", "mp", "mf", "bm", "gm", "dm", "pm", "fm", 1528 "mb", "mg", "md", "bm", "gm", "dm", 1529 "bl", "gl", "dw", "pl", "fl", "lb", "ld", "lg", "lp", "lf", 1530 "bl", "gl", "dw", "lb", "ld", "lg", 1531 "nb", "ng", "nd", "np", "nf", "bn", "gn", "dn", "pn", "fn", 1532 "nb", "ng", "nd", "bn", "gn", "dn", 1533 "my", "gy", "by", "py", "mw", "gw", "bw", "pw", 1534 "bg", "gb", "bd", "db", "bf", "fb", 1535 "gd", "dg", "gp", "pg", "gf", "fg", 1536 "dp", "pd", "df", "fd", 1537 "pf", "fp", 1538 "bg", "gb", "bd", "db", "gd", "dg", 1539 "bg", "gb", "bd", "db", "gd", "dg", 1540 "bg", "gb", "bd", "db", "gd", "dg", 1541 "bg", "gb", "bd", "db", "gd", "dg", 1542 "bg", "gb", "bd", "db", "gd", "dg", 1543 }, 1544 new String[]{ 1545 "b", "g", "d", "m", "n", "r", "sh", "p", "f", 1546 "b", "g", "d", "m", "n", "r", "b", "g", "d", "m", "n", "r", "sh", 1547 "b", "g", "d", "m", "r", "b", "g", "d", "r", 1548 "rb", "rd", "rg", "rp", "rf", "lb", "ld", "lg", "lp", "lf", 1549 }, 1550 new String[]{}, 1551 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{3, 7, 5, 1}, 0.1, 0.15, 0.0, 0.0, genericSanityChecks, true); 1552 } 1553 /** 1554 * Fantasy language that might be suitable for stealthy humanoids, such as goblins, or as a secret language used 1555 * by humans who want to avoid notice. Uses no "hard" sounds like "t" and "k", but also tries to avoid the flowing 1556 * aesthetic of fantasy languages associated with elves. Tends toward clusters of consonants like "bl", "gm", "dg", 1557 * and "rd". 1558 * <br> 1559 * Gwabdip dwupdagorg moglab yurufrub. 1560 */ 1561 public static final FakeLanguageGen GOBLIN = goblin().register("Goblin"); 1562 1563 private static FakeLanguageGen elf(){ 1564 return new FakeLanguageGen( 1565 new String[]{"a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 1566 "a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 1567 "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 1568 "ai", "ai", "ai", "ea", "ea", "ea", "ia", "ae" 1569 }, 1570 new String[]{ 1571 "ai", "ai", "ae", "ea", "ia", "ie", 1572 "â", "â", "ai", "âi", "aî", "aï", "î", "î", "ï", "ï", "îe", "iê", "ïe", "iê", 1573 "e", "ë", "ë", "ëa", "ê", "êa", "eâ", "ei", "eî", "o", "ô", 1574 "a", "a", "a", "e", "e", "e", "i", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 1575 "a", "a", "e", "e", "i", "o", "a", "a", "a", "e", "e", "e", "i", "i", "o", 1576 "ai", "ai", "ai", "ai", "ai", "ei", "ei", "ei", "ea", "ea", "ea", "ea", 1577 "ie", "ie", "ie", "ie", "ie", "ia", "ia", "ia", "ia" 1578 }, 1579 new String[]{"l", "r", "n", "m", "th", "v", "s", "sh", "z", "f", "p", "h", "y", "c", 1580 "l", "r", "n", "m", "th", "v", "f", "y", 1581 "l", "r", "n", "m", "th", "v", "f", 1582 "l", "r", "n", "th", "l", "r", "n", "th", 1583 "l", "r", "n", "l", "r", "n", "l", "r", "n", 1584 "pl", "fy", "ly", "cl", "fr", "pr", "qu", 1585 }, 1586 new String[]{"rm", "ln", "lv", "lth", "ml", "mv", "nv", "vr", "rv", "ny", "mn", "nm", "ns", "nth"}, 1587 new String[]{ 1588 "l", "r", "n", "m", "th", "s", 1589 "l", "r", "n", "th", "l", "r", "n", "th", 1590 "l", "r", "n", "l", "r", "n", "l", "r", "n", 1591 "r", "n", "r", "n", "r", "n", "n", "n", "n", "n" 1592 }, 1593 new String[]{}, 1594 new String[]{}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 6, 6, 3, 1}, 0.4, 0.3, 0.0, 0.0, genericSanityChecks, true); 1595 } 1596 1597 /** 1598 * Fantasy language that tries to imitate the various languages spoken by elves in J.R.R. Tolkien's works, using 1599 * accented vowels occasionally and aiming for long, flowing, vowel-heavy words. It's called ELF because there isn't 1600 * a consistent usage across fantasy and mythological sources of either "elvish", "elfish", "elven", "elfin", or any 1601 * one adjective for "relating to an elf." In the GDX display module, the "smooth" and "unicode" fonts, among 1602 * others, support all the accented characters you need for this. 1603 * <br> 1604 * Il ilthiê arel enya; meâlelail theasor arôreisa. 1605 */ 1606 public static final FakeLanguageGen ELF = elf().register("Elf"); 1607 1608 private static FakeLanguageGen demonic(){ 1609 return new FakeLanguageGen( 1610 new String[]{"a", "a", "a", "a", 1611 "e", 1612 "i", "i", 1613 "o", "o", "o", "o", "o", 1614 "u", "u", "u", "u", "u", 1615 }, 1616 new String[]{}, 1617 new String[]{ 1618 "b", "bh", "d", "dh", "t", "tl", "ts", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", "v", "y", 1619 "br", "bhr", "dr", "dhr", "tr", "tsr", "kr", "khr", "gr", "ghr", "fr", "shr", "vr", 1620 "bl", "bhl", "tsl", "kl", "chl", "khl", "gl", "ghl", "fl", "sl", "zl", "vl", 1621 "dz", "chf", "sf", "shf", "zv", "st", "sk", 1622 "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr", 1623 "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr", 1624 "x", "x", "x", "gh", "gh", "ghr", 1625 "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr", 1626 "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr", 1627 "x", "x", "x", "gh", "gh", "ghr", 1628 "t", "t", "t", "ts", "ts", "k", "k", "k", "kh", "kh", "kh", "kh", "khr", "kl", "kl", "kr", "kr", 1629 "z", "z", "z", "v", "v", "v", "zv", "zv", "vr", "vr", "vl", "vl", "dz", "sk", "sk", "sh", "shr", 1630 "x", "x", "x", "gh", "gh", "ghr", 1631 }, 1632 new String[]{}, 1633 new String[]{ 1634 "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", 1635 "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", 1636 "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", 1637 "b", "bh", "d", "dh", "t", "lt", "k", "ch", "kh", "g", "gh", "f", "x", "s", "sh", "z", "r", 1638 "rb", "rbs", "rbh", "rd", "rds", "rdh", "rt", "rts", "rk", "rks", "rch", "rkh", "rg", "rsh", "rv", "rz", 1639 "lt", "lts", "lk", "lch", "lkh", "lg", "ls", "lz", "lx", 1640 "bs", "ds", "ts", "lts", "ks", "khs", "gs", "fs", "rs", "rx", 1641 "bs", "ds", "ts", "lts", "ks", "khs", "gs", "fs", "rs", "rx", 1642 "rbs", "rds", "rts", "rks", "rkhs", "rgs", "rfs", "rs", "rx", 1643 "lbs", "lds", "lts", "lks", "lkhs", "lgs", "lfs", 1644 "rdz", "rvz", "gz", "rgz", "vd", "kt", 1645 "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk", 1646 "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh", 1647 "x", "x", "x", "gh", "gh", "gh", "rgh", 1648 "ts", "ts", "ks", "ks", "khs", 1649 "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk", 1650 "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh", 1651 "x", "x", "x", "gh", "gh", "gh", "rgh", 1652 "ts", "ts", "ks", "ks", "khs", 1653 "t", "t", "t", "rt", "lt", "k", "k", "k", "k", "k", "kh", "kh", "kh", "kh", "kh", "rkh", "lk", "rk", "rk", 1654 "z", "z", "z", "z", "v", "rv", "rv", "dz", "ks", "sk", "sh", 1655 "x", "x", "x", "gh", "gh", "gh", "rgh", 1656 "ts", "ts", "ks", "ks", "khs", 1657 }, 1658 new String[]{}, 1659 new String[]{"'"}, new int[]{1, 2, 3}, new double[]{6, 7, 3}, 0.05, 0.08, 0.11, 0.0, null, true); 1660 } 1661 /** 1662 * Fantasy language that might be suitable for a language spoken by demons, aggressive warriors, or people who seek 1663 * to emulate or worship similar groups. The tendency here is for DEMONIC to be the language used by creatures that 1664 * are considered evil because of their violence, while INFERNAL would be the language used by creatures that are 1665 * considered evil because of their manipulation and deceit (DEMONIC being "chaotic evil" and INFERNAL being "lawful 1666 * evil"). This uses lots of sounds that don't show up in natural languages very often, mixing harsh or guttural 1667 * sounds like "kh" and "ghr" with rare sounds like "vr", "zv", and "tl". It uses vowel-splitting in a way that is 1668 * similar to LOVECRAFT, sometimes producing sounds like "tsa'urz" or "khu'olk". 1669 * <br> 1670 * Vrirvoks xatughat ogz; olds xu'oz xorgogh! 1671 */ 1672 public static final FakeLanguageGen DEMONIC = demonic().register("Demonic"); 1673 1674 private static FakeLanguageGen infernal(){ 1675 return new FakeLanguageGen( 1676 new String[]{ 1677 "a", "a", "a", "à", "á", "â", "ä", 1678 "e", "e", "e", "e", "e", "e", "e", "e", "è", "é", "ê", "ë", 1679 "i", "i", "i", "i", "ì", "í", "î", "ï", 1680 "o", "o", "ò", "ó", "ô", "ö", 1681 "u", "u", "ù", "ú", "û", "ü", 1682 }, 1683 new String[]{"æ", "ai", "aî", "i", "i", "î", "ï", "ia", "iâ", "ie", "iê", "eu", "eû", "u", "u", "û", "ü"}, 1684 new String[]{"b", "br", "d", "dr", "h", "m", "z", "k", "l", "ph", "t", "n", "y", "th", "s", "sh", 1685 "m", "m", "m", "z", "z", "l", "l", "l", "k", "k", "b", "d", "h", "h", "y", "th", "th", "s", "sh", 1686 }, 1687 new String[]{ 1688 "mm", "mm", "mm", "lb", "dd", "dd", "dd", "ddr", "bb", "bb", "bb", "bbr", "lz", "sm", "zr", 1689 "thsh", "lkh", "shm", "mh", "mh", 1690 }, 1691 new String[]{ 1692 "b", "d", "h", "m", "n", "z", "k", "l", "ph", "t", "th", "s", "sh", "kh", 1693 "h", "m", "n", "z", "l", "ph", "t", "th", "s", 1694 "h", "h", "h", "m", "m", "n", "n", "n", "n", "n", "l", "l", "l", "l", "l", "t", "t", "t", 1695 "th", "th", "s", "s", "z", "z", "z", "z", 1696 }, 1697 new String[]{"ael", "im", "on", "oth", "et", "eus", "iel", "an", "is", "ub", "ez", "ath", "esh", "ekh", "uth", "ut"}, 1698 new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{3, 5, 9, 4}, 0.75, 0.35, 0.17, 0.07, genericSanityChecks, true); 1699 } 1700 /** 1701 * Fantasy language that might be suitable for a language spoken by fiends, users of witchcraft, or people who seek 1702 * to emulate or worship similar groups. The tendency here is for DEMONIC to be the language used by creatures that 1703 * are considered evil because of their violence, while INFERNAL is the language used by creatures that are 1704 * considered evil because of their manipulation and deceit (DEMONIC being "chaotic evil" and INFERNAL being "lawful 1705 * evil"). The name INFERNAL refers to Dante's Inferno and the various naming conventions used for residents of Hell 1706 * in the more-modern Christian traditions (as well as some of the stylistic conventions of Old Testament figures 1707 * described as false idols, such as Moloch and Mammon). In an effort to make this distinct from the general style 1708 * of names used in ancient Hebrew (since this is specifically meant for the names of villains as opposed to normal 1709 * humans), we add in vowel splits as used in LOVECRAFT and DEMONIC, then add quite a few accented vowels. These 1710 * traits make the language especially well-suited for "deal with the Devil" written bargains, where a single accent 1711 * placed incorrectly could change the meaning of a contract and provide a way for a fiend to gain leverage. 1712 * <br> 1713 * Zézîzûth eke'iez áhìphon; úhiah îbbëphéh haîtemheû esmez... 1714 */ 1715 public static final FakeLanguageGen INFERNAL = infernal().register("Infernal"); 1716 1717 private static FakeLanguageGen simplish(){ 1718 return new FakeLanguageGen( 1719 new String[]{ 1720 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 1721 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 1722 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 1723 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 1724 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 1725 "ai", "ai", "ea", "io", "oi", "ia", "io", "eo" 1726 }, 1727 new String[]{"u", "u", "oa"}, 1728 new String[]{ 1729 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n", 1730 "p", "pl", "pr", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "v", "w", "y", "z", 1731 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n", 1732 "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y", 1733 "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", 1734 "p", "r", "s", "sh", "t", "th", 1735 "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", 1736 "p", "r", "s", "sh", "t", "th", 1737 "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", 1738 "p", "r", "s", "sh", "t", "th", 1739 "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", 1740 "p", "r", "s", "sh", "t", "th", 1741 "b", "d", "f", "g", "h", "l", "m", "n", 1742 "p", "r", "s", "sh", "t", "th", 1743 "b", "d", "f", "g", "h", "l", "m", "n", 1744 "p", "r", "s", "sh", "t", "th", 1745 "r", "s", "t", "l", "n", 1746 }, 1747 new String[]{"ch", "j", "w", "y", "v", "w", "y", "w", "y", "ch", 1748 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1749 }, 1750 new String[]{"bs", "lt", "mb", "ng", "ng", "nt", "ns", "ps", "mp", "rt", "rg", "sk", "rs", "ts", "lk", "ct", 1751 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", "th", "z", 1752 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1753 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1754 "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1755 "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1756 "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1757 "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "sh", "t", 1758 }, 1759 new String[]{}, 1760 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{7, 18, 6, 1}, 0.26, 0.12, 0.0, 0.0, genericSanityChecks, true); 1761 } 1762 /** 1763 * English-like language that omits complex spelling and doesn't include any of the uncommon word endings of English 1764 * like "ought" or "ation." A good choice when you want something that doesn't use any non-US-keyboard letters, 1765 * looks somewhat similar to English, and tries to be pronounceable without too much effort. This doesn't have any 1766 * doubled or silent letters, nor does it require special rules for pronouncing vowels like "road" vs. "rod", though 1767 * someone could make up any rules they want. 1768 * <br> 1769 * Fledan pranam, simig bag chaimer, drefar, woshash is sasik. 1770 */ 1771 public static final FakeLanguageGen SIMPLISH = simplish().register("Simplish"); 1772 1773 1774 private static FakeLanguageGen alien_a(){ 1775 return new FakeLanguageGen( 1776 new String[]{"a", "a", "a", "a", "a", "a", "a", "ai", "ai", "ao", "ao", "ae", "ae", "e", "e", "e", "e", 1777 "ea", "eo", "i", "i", "i", "i", "i", "i", "ia", "ie", "io", "o", "o", "o", "oa"}, 1778 new String[]{}, 1779 new String[]{"c", "f", "h", "j", "l", "m", "n", "p", "q", "r", "s", "v", "w", "x", "y", "z", 1780 "c", "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z", 1781 "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z", 1782 "hc", "hf", "hj", "hl", "hm", "hn", "hq", "hr", "hv", "hw", "hy", "hz", 1783 "cr", "fr", "jr", "mr", "nr", "pr", "qr", "sr", "vr", "xr", "yr", "zr", 1784 "cy", "fy", "jy", "my", "ny", "py", "qy", "ry", "sy", "vy", "xy", "zy", 1785 "cl", "fl", "jl", "ml", "nl", "pl", "ql", "sl", "vl", "xl", "yl", "zl", 1786 }, 1787 new String[]{ 1788 "cr", "fr", "jr", "mr", "nr", "pr", "qr", "sr", "vr", "xr", "yr", "zr", 1789 "cy", "fy", "jy", "my", "ny", "py", "qy", "ry", "sy", "vy", "xy", "zy", 1790 "cl", "fl", "jl", "ml", "nl", "pl", "ql", "sl", "vl", "xl", "yl", "zl", 1791 "jc", "lc", "mc", "nc", "qc", "rc", "sc", "wc", "yc", "zc", 1792 "cf", "jf", "lf", "nf", "qf", "rf", "sf", "vf", "wf", "yf", "zf", 1793 "cj", "fj", "lj", "mj", "nj", "qj", "rj", "sj", "wj", "yj", "zj", 1794 "cm", "fm", "jm", "lm", "nm", "qm", "rm", "sm", "vm", "wm", "ym", "zm", 1795 "cn", "fn", "jn", "ln", "mn", "qn", "rn", "sn", "vn", "wn", "yn", "zn", 1796 "cp", "fp", "jp", "lp", "mp", "np", "qp", "rp", "sp", "vp", "wp", "yp", "zp", 1797 "cq", "jq", "lq", "mq", "nq", "rq", "sq", "wq", "yq", "zq", 1798 "cs", "fs", "js", "ls", "ms", "ns", "qs", "vs", "ws", "ys", "zs", 1799 "cv", "fv", "jv", "lv", "mv", "nv", "qv", "rv", "sv", "wv", "yv", "zv", 1800 "cw", "fw", "jw", "lw", "mw", "nw", "qw", "rw", "sw", "vw", "yw", "zw", 1801 "cx", "jx", "lx", "mx", "nx", "qx", "rx", "vx", "wx", "yx", "zx", 1802 "cz", "fz", "lz", "mz", "nz", "qz", "rz", "sz", "vz", "wz", "yz", 1803 }, 1804 new String[]{ 1805 "c", "f", "h", "j", "l", "m", "n", "p", "q", "r", "s", "v", "w", "x", "y", "z", 1806 "c", "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z", 1807 "h", "j", "l", "m", "n", "q", "r", "s", "v", "w", "x", "y", "z", 1808 "hc", "hf", "hj", "hl", "hm", "hn", "hq", "hr", "hv", "hw", "hy", "hz", 1809 }, 1810 new String[]{}, 1811 new String[]{}, new int[]{1, 2, 3}, new double[]{1, 1, 1}, 0.65, 0.6, 0.0, 0.0, null, true); 1812 } 1813 1814 /** 1815 * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for 1816 * an alien species. This alien language emphasizes unusual consonant groups and prefers the vowels 'a' and 'i', 1817 * sometimes with two different vowels in one syllable, like with 'ea', but never two of the same vowel, like 'ee'. 1818 * Many consonant groups may border on unpronounceable unless a different sound is meant by some letters, such as 1819 * 'c', 'h', 'q', 'x', 'w', and 'y'. In particular, 'x' and 'q' may need to sound like different breathy, guttural, 1820 * or click noises for this to be pronounced by humans effectively. 1821 * <br> 1822 * Jlerno iypeyae; miojqaexli qraisojlea epefsaihj xlae... 1823 */ 1824 public static final FakeLanguageGen ALIEN_A = alien_a().register("Alien A"); 1825 1826 private static FakeLanguageGen korean() 1827 { 1828 return new FakeLanguageGen( 1829 new String[]{ 1830 "a", "ae", "ya", "yae", "eo", "e", "yeo", "ye", "o", "wa", "wae", 1831 "oe", "yo", "u", "wo", "we", "wi", "yu", "eu", "i", "ui", 1832 "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "u", "u", "u", "u", 1833 "ae", "ya", "eo", "eo", "eu", "eu", "wa", "wae", "wo", "oe", "oe", 1834 "yo", "yo", "yu", "yu", "eu", 1835 }, 1836 new String[]{}, 1837 new String[]{ 1838 "g", "n", "d", "r", "m", "b", "s", "j", "ch", "k", "t", "p", "h", 1839 "g", "n", "d", "b", "p", "k", "j", "ch", "h", 1840 "g", "n", "d", "b", "p", "k", "j", "h", 1841 "g", "n", "p", "k", "j", 1842 "g", "p", "k", 1843 "g", "p", "k", 1844 }, 1845 new String[]{ 1846 "g", "kg", "ngn", "kd", "ngn", "ngm", "kb", "ks", "kj", "kch", "k-k", "kt", "kp", "k", 1847 "n", "n-g", "nn", "nd", "nn", "nm", "nb", "ns", "nj", "nch", "nk", "nt", "np", "nh", 1848 "d", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "t", 1849 "r", "lg", "nn", "ld", "ll", "lm", "lb", "ls", "lj", "lch", "lk", "lt", "lp", "lh", 1850 "m", "mg", "mn", "md", "mn", "mm", "mb", "ms", "mj", "mch", "mk", "mt", "mp", "mh", 1851 "b", "pg", "mn", "pd", "mn", "mm", "pb", "ps", "pj", "pch", "pk", "pt", "p-p", "p", 1852 "s", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "t", 1853 "ng-", "ngg", "ngn", "ngd", "ngn", "ngm", "ngb", "ngs", "ngj", "ngch", "ngk", "ngt", "ngp", "ngh", 1854 "j", "tg", "nn", "td", "nn", "nm", "tb", "ts", "tj", "tch", "tk", "t-t", "tp", "ch", 1855 "t", "t", "t", "j", "j", "j", "g", "g", "g", "g", "n", "n", "n", "n", "n", "ng", "ng", "ng", 1856 "d", "d", "d", "b", "b", 1857 "tt", "nn", "kk", "kk", "ks", 1858 "h", "k", "nn", "t", "nn", "nm", "p", "hs", "ch", "tch", "tk", "tt", "tp", "t", 1859 "kk", "pp", "ss", "tt", "jj", "ks", "nch", "nh", "r", 1860 "r", "r", "r", "r", "r", "r", "r", "r", "r", "r", "r", "r", 1861 "ngg", "ngn", "ngm", "ngj", "ngch", "ngk", "ngp", 1862 "mg", "mch", "mk", "md", "mb", "mp", 1863 "nj", "nch", "nd", "nk", "nb", "nj", "nch", "nd", "nk", 1864 "kg", "kj", "kch" 1865 }, 1866 new String[]{ 1867 "k", "n", "t", "l", "m", "p", "k", "ng", "h", "n", "n", 1868 "k", "n", "t", "l", "m", "p", "k", "ng", "h", "t", 1869 }, 1870 new String[]{"ul", "eul", "eol", "ol", "il", "yeol", "yol", "uk", "euk", "eok", "aek", "ok", "ak", 1871 "on", "ong", "eong", "yang", "yong", "yeong", "ung", "wong", "om", "am", "im", "yuh", "uh", "euh", 1872 "ap", "yaep", "eop", "wep", "yeop" 1873 }, 1874 new String[]{"-"}, 1875 new int[]{1, 2, 3, 4}, new double[]{14, 9, 3, 1}, 0.14, 0.24, 0.02, 0.09, 1876 null, true); 1877 } 1878 /** 1879 * Imitation text from an approximation of Korean, using the Revised Romanization method that is official in South 1880 * Korea today and is easier to type. The text this makes may be hard to pronounce. Korean is interesting as a 1881 * language to imitate for a number of reasons; many of the sounds in it are rarely found elsewhere, it can cluster 1882 * consonants rather tightly (most languages don't; English does to a similar degree but Japanese hardly has any 1883 * groups of consonants), and there are many more vowel sounds without using tones (here, two or three letters are 1884 * used for a vowel, where the first can be y or w and the rest can be a, e, i, o, or u in some combination). Some 1885 * letter combinations possible here are impossible or very rare in correctly-Romanized actual Korean, such as the 1886 * rare occurrence of a single 'l' before a vowel (it normally only appears in Romanized text before a consonant or 1887 * at the end of a word). 1888 * <br> 1889 * Hyeop euryam, sonyon muk tyeok aengyankeon, koelgwaelmwak. 1890 */ 1891 public static final FakeLanguageGen KOREAN_ROMANIZED = korean().register("Korean Romanized"); 1892 1893 private static FakeLanguageGen alien_e(){ 1894 return new FakeLanguageGen( 1895 new String[]{"a", "a", "a", "a", "a", "a", "aa", "aa", 1896 "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "ee", "ee", "ee", "ee", 1897 "i", "i", "i", "i", "i", "ii", 1898 "o", "o", "o", "o", 1899 "u", "u", "u" 1900 }, 1901 new String[]{}, 1902 new String[]{"t", "k", "c", "g", "z", "s", "d", "r", "ts", 1903 "tr", "kr", "cr", "gr", "zr", "st", "sk", "dr", 1904 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1905 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1906 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1907 "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts", 1908 "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts", 1909 "t", "k", "c", "g", "r", "ts", "t", "k", "c", "g", "r", "ts", 1910 "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", 1911 "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", "t", "k", "ts", 1912 "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", 1913 "tr", "kr", "st", "sk", "tq", "kq", "sq" 1914 }, 1915 new String[]{ 1916 "tt", "kk", "cc", "gg", "zz", "dd", "s", "r", "ts", 1917 "tr", "kr", "cr", "gr", "zr", "st", "sk", "dr", 1918 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1919 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1920 "tq", "kq", "cq", "gq", "zq", "sq", "dq", 1921 "tk", "kt", "tc", "ct", "gt", "tg", "zt", "tz", "td", "dt", "rt", "rtr", "tst", 1922 "kc", "ck", "gk", "kg", "zk", "kz", "kd", "dk", "rk", "rkr", "tsk", "kts", 1923 "gc", "cg", "zc", "cz", "cd", "dc", "rc", "rcr", "tsc", "cts", 1924 "zg", "gz", "gd", "dg", "rg", "rgr", "tsg", "gts", 1925 "zd", "dz", "rz", "rzr", "tsz", "zts", 1926 "rd", "rdr", "tsd", "dts", 1927 "tt", "tt", "tt", "tt", "tt", "tt", 1928 "tt", "tt", "tt", "tt", "tt", "tt", 1929 "kk", "kk", "kk", "kk", "kk", "kk", 1930 "kk", "kk", "kk", "kk", "kk", "kk", 1931 "kt", "tk", "kt", "tk", "kt", "tk", "kt", "tk", 1932 }, 1933 new String[]{ 1934 "t", "k", "c", "g", "z", "s", "d", "r", "ts", 1935 "t", "k", "t", "k", "t", "k", "ts", 1936 "t", "k", "c", "g", "z", "s", "d", "r", "ts", 1937 "t", "k", "t", "k", "t", "k", "ts", 1938 "st", "sk", "sc", "sg", "sz", "ds", 1939 "rt", "rk", "rc", "rg", "rz", "rd", "rts" 1940 }, 1941 new String[]{}, 1942 new String[]{}, new int[]{1, 2, 3}, new double[]{5, 4, 2}, 0.45, 0.0, 0.0, 0.0, null, true); 1943 } 1944 1945 /** 1946 * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for 1947 * an alien species. This alien language emphasizes hard sounds and prefers the vowels 'e' and 'a', sometimes with 1948 * two of the same vowel, like 'ee', but never with two different vowels in one syllable, like with 'ea'. 1949 * This language is meant to use click sounds, if pronunciation is given, where 'q' modifies a consonant to form a 1950 * click, such as 'tq'. This is like how 'h' modifies letters in English to make 'th' different from 't' or 'h'. 1951 * This may be ideal for a species with a beak (or one that lacks lips for some other reason), since it avoids using 1952 * sounds that require lips (some clicks might be approximated by other species using their lips if this uses some 1953 * alien-specific clicking organ). 1954 * <br> 1955 * Reds zasg izqekkek zagtsarg ukaard ac ots as! 1956 */ 1957 public static final FakeLanguageGen ALIEN_E = alien_e().register("Alien E"); 1958 1959 private static FakeLanguageGen alien_i(){ 1960 return new FakeLanguageGen( 1961 new String[]{ 1962 "a", "a", "a", "a", "a", "a", "à", "á", "â", "ā", "ä", 1963 "e", "e", "e", "e", "e", "e", "è", "é", "ê", "ē", "ë", 1964 "i", "i", "i", "i", "i", "i", "i", "i", "ì", "í", "î", "ï", "ī", 1965 "i", "i", "i", "i", "i", "i", "i", "i", "ì", "í", "î", "ï", "ī", 1966 "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "o", "ò", "ó", "ô", "ō", "ö", 1967 "u", "u", "u", "u", "u", "u", "ù", "ú", "û", "ū", "ü", 1968 }, 1969 new String[]{}, 1970 new String[]{ 1971 "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l", 1972 "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l", 1973 "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "j", "v", "h", "r", "l", 1974 "r", "r", "r", "r", "r", "l", "l", "l", "l", "l", 1975 "gr", "gl", "zr", "zl", "sl", "shr", "thr", "mr", "nr", "pr", "pl", "br", "bl", "vr", "vl", "hr", 1976 "zv", "sp", "zg" 1977 }, 1978 new String[]{ 1979 "j", "h", 1980 }, 1981 new String[]{ 1982 "r", "l", "ch", "g", "z", "zh", "s", "sh", "th", "m", "n", "p", "b", "v", "r", "l", 1983 "th", "zh", "sh", "th", "zh", "sh", "lth", "lzh", "lsh", "rth", "rzh", "rsh", 1984 }, 1985 new String[]{}, 1986 new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{6, 9, 5, 1}, 0.6, 0.4, 0.075, 0.0, null, true); 1987 } 1988 1989 /** 1990 * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for 1991 * an alien species. This alien language emphasizes "liquid" sounds such as 'l', 'r', and mixes with those and other 1992 * consonants, and prefers the vowels 'i' and 'o', never with two of the same vowel, like 'ee', nor with two 1993 * different vowels in one syllable, like with 'ea'; it uses accent marks heavily and could be a tonal language. 1994 * It sometimes splits vowels with a single apostrophe, and rarely has large consonant clusters. 1995 * <br> 1996 * Asherzhäl zlómór ìsiv ázá nralthóshos, zlôbùsh. 1997 */ 1998 public static final FakeLanguageGen ALIEN_I = alien_i().register("Alien I"); 1999 2000 private static FakeLanguageGen alien_o(){ 2001 return new FakeLanguageGen( 2002 new String[]{ 2003 "a", "e", "i", "o", "o", "o", "o", "u", 2004 "aa", "ea", "ia", "oa", "oa", "oa", "ua", "ae", "ai", "ao", "ao", "ao", "au", 2005 "ee", "ie", "oe", "oe", "oe", "ue", "ei", "eo", "eo", "eo", "eu", 2006 "ii", "oi", "oi", "oi", "ui", "io", "io", "io", "iu", 2007 "oo", "ou", "uo", "oo", "ou", "uo", "oo", "ou", "uo", "uu", 2008 "aa", "ea", "ia", "oa", "oa", "oa", "ua", "ae", "ai", "ao", "ao", "ao", "au", 2009 "ee", "ie", "oe", "oe", "oe", "ue", "ei", "eo", "eo", "eo", "eu", 2010 "ii", "oi", "ui", "io", "io", "io", "iu", 2011 "oo", "ou", "uo", "oo", "ou", "uo", "oo", "ou", "uo", "uu", 2012 "aea", "aia", "aoa", "aoa", "aoa", "aua", "eae", "eie", "eoe", "eoe", "eoe", "eue", 2013 "iai", "iei", "ioi", "ioi", "ioi", "iui", "uau", "ueu", "uiu", "uou", 2014 "oao", "oeo", "oio", "ouo", "oao", "oeo", "oio", "ouo", "oao", "oeo", "oio", "ouo", 2015 "aei", "aeo", "aeo", "aeo", "aeu", "aie", "aio", "aio", "aio", "aiu", 2016 "aoe", "aoi", "aou", "aoe", "aoi", "aou", "aoe", "aoi", "aou", "aue", "aui", "auo", "auo", "auo", 2017 "eai", "eao", "eao", "eao", "eau", "eia", "eio", "eio", "eio", "eiu", 2018 "eoa", "eoi", "eou", "eoa", "eoi", "eou", "eoa", "eoi", "eou", "eua", "eui", "euo", "euo", "euo", 2019 "iae", "iao", "iao", "iao", "iau", "iea", "ieo", "ieo", "ieo", "ieu", 2020 "ioa", "ioe", "iou", "ioa", "ioe", "iou", "ioa", "ioe", "iou", "iua", "iue", "iuo", "iuo", "iuo", 2021 "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui", 2022 "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui", 2023 "oae", "oai", "oau", "oea", "oei", "oeu", "oia", "oie", "oiu", "oua", "oue", "oui", 2024 "uae", "uai", "uao", "uao", "uao", "uea", "uei", "ueo", "ueo", "ueo", "uia", "uie", 2025 "uio", "uoa", "uoe", "uoi", "uio", "uoa", "uoe", "uoi", "uio", "uoa", "uoe", "uoi", 2026 }, 2027 new String[]{}, 2028 new String[]{ 2029 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2030 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2031 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2032 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2033 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2034 "hm", "hn", "hr", "hw", "hv", "hl", "hy", 2035 "fm", "fn", "fr", "fw", "fv", "fl", "fy", 2036 "mr", "vr", "ry" 2037 }, 2038 new String[]{ 2039 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2040 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2041 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2042 "m", "n", "r", "w", "h", "v", "f", "l", "y", 2043 "mm", "nn", "rr", "ww", "hh", "vv", "ff", "ll", "yy", 2044 "mm", "nn", "rr", "ww", "hh", "vv", "ff", "ll", "yy", 2045 "hm", "hn", "hr", "hw", "hv", "hl", "hy", 2046 "fm", "fn", "fr", "fw", "fv", "fl", "fy", 2047 "mr", "vr", "ry" 2048 }, 2049 new String[]{ 2050 "m", "n", "r", "h", "v", "f", "l", 2051 "m", "n", "r", "h", "v", "f", "l", 2052 "m", "n", "r", "h", "v", "f", "l", 2053 "rm", "rn", "rv", "rf", "rl", 2054 "lm", "ln", "lv", "lf" 2055 }, 2056 new String[]{}, 2057 new String[]{}, new int[]{1, 2, 3}, new double[]{3, 6, 4}, 0.0, 0.55, 0.0, 0.0, null, true); 2058 } 2059 2060 /** 2061 * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for 2062 * an alien species. This alien language emphasizes large clusters of vowels, typically with 2 or 3 vowel sounds 2063 * between consonants, though some vowel groups could be interpreted in multiple ways (such as English "maim" and 2064 * "bail", which also have regional differences in pronunciation). As the name would suggest, it strongly prefers 2065 * using the vowel "o", with it present in about half the groups, but doesn't have any preference toward or against 2066 * the other vowels it uses, "a", "e", "i", and "u". The consonants completely avoid hard sounds like "t" and "k", 2067 * medium-hard sounds like "g" and "b", and also sibilants like "s" and "z". This should be fairly hard to 2068 * pronounce, but possible. 2069 * <br> 2070 * Foiuhoeorfeaorm novruol naionouffeu meuif; hmoieloreo naemriou. 2071 */ 2072 public static final FakeLanguageGen ALIEN_O = alien_o().register("Alien O"); 2073 2074 // àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳ 2075 // çðþñýćĉċčďđĝğġģĥħĵķĺļľŀłńņňŋŕŗřśŝşšţťŵŷÿźżžșțẁẃẅ 2076 private static FakeLanguageGen alien_u(){ 2077 return new FakeLanguageGen( 2078 new String[]{ 2079 "a", "a", "a", "a", "ä", "i", "o", "o", "o", "ö", "u", "u", "u", "u", "u", "u", "ü", "ü" 2080 }, 2081 new String[]{}, 2082 new String[]{ 2083 "b", "b", "b", "b", "d", "d", "g", "g", "ġ", "h", "h", "h", "h", "ħ", 2084 "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p", 2085 "q", "q", "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v", 2086 "w", "w", "w", "w", "ẃ", "y", "y", "y", "y", "ý" 2087 }, 2088 new String[]{ 2089 "b", "b", "b", "b", "d", "d", "g", "g", "ġ", "h", "h", "h", "h", "ħ", 2090 "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p", 2091 "q", "q", "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v", 2092 "w", "w", "w", "w", "ẃ", "y", "y", "y", "y", "ý" 2093 }, 2094 new String[]{ 2095 "b", "b", "b", "b", "d", "d", "g", "g", "ġ", 2096 "l", "l", "l", "l", "ł", "m", "m", "m", "m", "m", "n", "n", "n", "n", "ñ", "ŋ", "p", "p", "p", 2097 "r", "r", "r", "ŕ", "s", "s", "s", "s", "ś", "v", "v", "v", "v", 2098 }, 2099 new String[]{"emb", "embrid", "embraŋ", "eŋ", "eŋul", "eŋov", "eẃul", "eẃuld", "eẃulb", 2100 "eviś", "evim", "ełurn", "ełav", "egiġ", "ergiġ", "elgiġ", "eŕu", "eŕup", "eŕulm", "eŕuv", 2101 "eħul", "eħid", "eħiŋ", "eyü", "eyür", "eyürl", "eyüld", "eyüns", "eqä", "eqäp", "eqäġ", 2102 "esu", "esumb", "esulg", "esurl", "eśo", "eśold", "eśolg", "eśu", "eśur", "eśuŋ", 2103 "eñu", "eñuns", "eñurn", "eño", "eñolb", "eñols" 2104 }, 2105 new String[]{"'"}, new int[]{1, 2, 3, 4, 5}, new double[]{3, 4, 7, 5, 2}, 0.4, 0.15, 0.06, 0.5, null, true); 2106 } 2107 2108 /** 2109 * Fantasy/sci-fi language that could be spoken by some very-non-human culture that would typically be fitting for 2110 * an alien species. This alien language is meant to have an abrupt change mid-word for many words, with the suffix 2111 * of roughly half of words using the letter "e", which is absent from the rest of the language; these suffixes can 2112 * also use consonant clusters, which are similarly absent elsewhere. The suffixes would make sense as a historical 2113 * relic or as a linguistic holdout from a historical merger. As the name would suggest, it strongly prefers 2114 * using the vowel "u", with it present in about half the groups, and can use the umlaut accent "ü" on some vowels. 2115 * The consonants completely avoid hard sounds like "t" and "k", and don't cluster; they often have special marks. 2116 * This should be relatively easy to pronounce for an alien language, though the words are rather long. 2117 * <br> 2118 * Üweħid vuŕeħid deẃul leŋul waloyeyür; äyovavü... 2119 */ 2120 public static final FakeLanguageGen ALIEN_U = alien_u().register("Alien U"); 2121 2122 private static FakeLanguageGen dragon(){ 2123 return new FakeLanguageGen( 2124 new String[]{ 2125 "a", "a", "a", "e", "e", "i", "i", "o", "o", "u", 2126 "a", "a", "a", "e", "e", "i", "i", "o", "o", "u", 2127 "a", "a", "a", "e", "e", "i", "i", "o", "o", "u", 2128 "a", "a", "a", "e", "e", "i", "i", "o", "o", "u", 2129 "a", "a", "a", "a", "a", "a", "e", "i", "o", 2130 "ai", "ai", "aa", "ae", "au", "ea", "ea", "ea", 2131 "ia", "ia", "ie", "io", "io", "oa", "ou" 2132 }, 2133 new String[]{ 2134 "aa", "aa", "aa", "ai", "ae", "ae", "ae", "au", "au", 2135 "ea", "ea", "eo", "eo", 2136 "ii", "ii", "ia", "ia", "ia", "ia", "ie", "ie", "ie", "io", "io", "io", 2137 "oa", "ou", "ou", "ou", "ou" 2138 }, 2139 new String[]{ 2140 "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z", 2141 "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z", 2142 "d", "f", "g", "h", "k", "l", "m", "n", "r", "t", "th", "v", "z", 2143 "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z", 2144 "d", "f", "g", "h", "l", "k", "l", "n", "r", "t", "th", "v", "z", 2145 "d", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z", 2146 "d", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z", 2147 "d", "g", "k", "l", "r", "t", 2148 "d", "g", "k", "l", "r", "t", 2149 "d", "g", "k", "l", "r", "t", 2150 "k", "k", "t", "t", "v", 2151 "k", "k", "t", "t", "th", 2152 "k", "k", "t", "t", "ch", 2153 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2154 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2155 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2156 "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr", 2157 "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr", 2158 }, 2159 new String[]{ 2160 "rch", "rd", "rg", "rk", "rm", "rn", "rp", "rt", "rth", "rv", "rw", "rz", 2161 "rch", "rd", "rg", "rk", "rm", "rn", "rp", "rt", "rth", "rv", "rw", "rz", 2162 "rdr", "rgr", "rkr", "rtr", "rthr", 2163 "lk", "lt", "lv", "lz", 2164 "ng", "nk", "ng", "nk", "ng", "nk", "ng", "nk", "nt", "nth", "nt", "nth", "nt", "nth", "nd", 2165 "ngr", "nkr", "ntr", "nthr", 2166 "dh", "gh", "lh", "mh", "nh", "rh", 2167 "dch", "dg", "dk", "dth", "dv", "dz", 2168 "kch", "kg", "kd", "kth", "kv", "kz", 2169 "gch", "gd", "gk", "gth", "gv", "gz", 2170 "tch", "tg", "tk", "ty", "tv", "tz", 2171 "zm", "zn", "zk", "zv", "zt", "zg", "zd", 2172 2173 "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z", 2174 "ch", "d", "f", "g", "h", "k", "l", "m", "n", "p", "r", "t", "th", "v", "w", "y", "z", 2175 "d", "f", "g", "h", "k", "l", "m", "n", "r", "t", "th", "v", "z", 2176 "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v", "z", 2177 "d", "f", "g", "h", "k", "l", "n", "r", "t", "th", "v", 2178 "d", "g", "k", "l", "n", "r", "t", "th", "v", 2179 "d", "g", "k", "l", "n", "r", "t", "th", "v", 2180 "d", "g", "k", "l", "r", "t", 2181 "d", "g", "k", "l", "r", "t", 2182 "d", "g", "k", "l", "r", "t", 2183 "k", "k", "t", "t", "r", 2184 "k", "k", "t", "t", "r", 2185 "k", "k", "t", "t", "r", 2186 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2187 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2188 "dr", "fr", "gr", "hr", "kr", "tr", "thr", 2189 "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr", 2190 "dr", "gr", "hr", "kr", "tr", "thr", "dr", "gr", "kr", "tr", 2191 2192 }, 2193 new String[]{ 2194 "z", "z", "z", "t", "t", "t", "n", "r", "k", "th" 2195 }, 2196 new String[]{"iamat", "at", "ut", "ok", "iok", "ioz", "ez", "ion", "ioth", "aaz", "iel"}, 2197 new String[]{}, new int[]{2, 3, 4, 5}, new double[]{2, 7, 10, 3}, 0.14, 0.04, 0.0, 0.11, genericSanityChecks, true); 2198 } 2199 2200 /** 2201 * Fantasy language that tries to sound like the speech of a powerful and pompous dragon, using long, complex words 2202 * and a mix of hard consonants like "t" and "k", "liquid" consonants like "l" and "r", and sometimes vowel groups 2203 * like "ie" and "aa". It frequently uses consonant clusters involving "r". It uses no accented characters. 2204 * <br> 2205 * Vokegodzaaz kigrofreth ariatarkioth etrokagik deantoznik hragriemitaaz gianehaadaz... 2206 */ 2207 public static final FakeLanguageGen DRAGON = dragon().register("Dragon"); 2208 2209 /** 2210 * Fantasy language based closely on {@link #DRAGON}, but with much shorter words normally and closing syllables 2211 * that may sound "rushed" or "crude", though it has the same general frequency of most consonants and vowels. 2212 * This means it still uses lots of "t", "k", and "r", can group two vowels sometimes, and when there's a consonant 2213 * in the middle of a word, it is often accompanied by an "r" on one or both sides. If used with 2214 * {@link NaturalLanguageCipher}, this will look very similar to DRAGON, because the syllable lengths aren't 2215 * determined by this object but by the text being ciphered. Still, the ends of words are often different. It is 2216 * called KOBOLD because, even though the original kobold myth was that of a goblin-like spirit that haunted cobalt 2217 * mines, the modern RPG treatment of kobolds frequently describes them as worshippers of dragons or in some way 2218 * created by dragons, but generally they're a sort of failure to live up to a dragon's high expectations. The feel 2219 * of this language is meant to be something like a dragon's speech, but much less "fancy" and rather curt. 2220 * <br> 2221 * Thritriz, laazak gruz kokak thon lut... 2222 */ 2223 public static final FakeLanguageGen KOBOLD = new FakeLanguageGen( 2224 DRAGON.openingVowels, DRAGON.midVowels, DRAGON.openingConsonants, DRAGON.midConsonants, DRAGON.closingConsonants, 2225 new String[]{"ik", "ak", "ek", "at", "it", "ik", "ak", "ek", "at", "it", "ik", "ak", "ek", "at", "it", "et", "ut", "ark", "irk", "erk"}, 2226 DRAGON.vowelSplitters, new int[]{1, 2, 3}, new double[]{5, 11, 1}, 2227 0.1, 0.0, 0.0, 0.22, genericSanityChecks, true).register("Kobold"); 2228 2229 private static FakeLanguageGen insect(){ 2230 return new FakeLanguageGen( 2231 new String[]{ 2232 "a", "a", "a", "a", "a", "a", 2233 "e", "e", "e", "e", 2234 "i", "i", "i", "i", "i", "i", "i", 2235 "o", "o", "o", 2236 "u", "u", 2237 }, 2238 new String[]{}, 2239 new String[]{"t", "k", "g", "sh", "s", "x", "r", "ts", 2240 "tr", "kr", "gr", "shr", "st", "sk", 2241 "tr", "kr", "st", "sk", "tr", "kr", "st", "sk", 2242 "t", "k", "g", "sh", "s", "x", "r", "ts", 2243 "t", "k", "r", "ts", "ts", 2244 "t", "k", "r", "tr", "kr", "t", "k", "r", "tr", "kr", "t", "k", "r", "tr", "kr", 2245 "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", "t", "k", 2246 }, 2247 new String[]{ 2248 "rr","rr","rr","rr","rr","rr","rr","rr","rr","rr", 2249 "rt", "rk", "rg", "rsh", "rs", "rx", "rts", 2250 "xt", "xk", "xg", "xr", 2251 "sts", "skr", "str", "sks" 2252 }, 2253 new String[]{ 2254 "t", "k", "g", "sh", "s", "x", "r", "ts", "t", "k", "g", "sh", "s", "x", "r", "ts", 2255 "rt", "rk", "rg", "rsh", "rs", "rx", "rts", 2256 "t", "t", "t", "t", "t", "t", "k", "k", "k", "k", "k", "k", "x", "x", "rr", "rr", "rr" 2257 }, 2258 new String[]{}, 2259 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{6, 4, 2, 1}, 0.3, 0.1, 0.0, 0.0, null, true); 2260 } 2261 2262 /** 2263 * Fantasy/sci-fi language that would typically be fitting for an insect-like species without a close equivalent to 2264 * human lips. This language emphasizes hard sounds such as 't' and 'k', uses some sibilants such as 's', 'sh', and 2265 * 'x', uses lots of 'r' sounds, includes trill sounds using 'rr' (as in Spanish), and uses primarily 'a' and 'i' 2266 * for vowels, with low complexity on vowels. Differs from {@link #ALIEN_E} by not having harder-to-explain click 2267 * sounds, and adjusting vowels/sibilants a fair bit. 2268 * <br> 2269 * Ritars tsarraxgits, krit trir istsak! 2270 */ 2271 public static final FakeLanguageGen INSECT = insect().register("Insect"); 2272 2273 private static FakeLanguageGen maori(){ 2274 return new FakeLanguageGen( 2275 new String[]{"a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2276 "o", "o", "o", "o", "o", "u", "u", 2277 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2278 "o", "o", "o", "o", "o", "u", "u", 2279 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2280 "o", "o", "o", "o", "o", "u", "u", 2281 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2282 "o", "o", "o", "o", "o", "u", "u", 2283 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2284 "o", "o", "o", "o", "o", "u", "u", 2285 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2286 "o", "o", "o", "o", "o", "u", "u", 2287 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2288 "o", "o", "o", "o", "o", "u", "u", 2289 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2290 "o", "o", "o", "o", "o", "u", "u", 2291 "a", "a", "a", "a", "a", "a", "ā", "ā", "e", "e", "e", "i", "i", "i", "i", 2292 "o", "o", "o", "o", "o", "u", "u", 2293 "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au", 2294 "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au", 2295 "āe", "āi", "āi", "āi", "āo", "āo", "āo", "āo", "āu", "oi", "oe", "ou", 2296 "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au", 2297 "ae", "ai", "ai", "ai", "ao", "ao", "ao", "ao", "au", 2298 "āe", "āi", "āi", "āi", "āo", "āo", "āo", "āo", "āu", "oi", "oe", "ou", 2299 "āa", "āoi", "āoe", "āou", 2300 "āa", "āoi", "āoe", "āou", 2301 "ea", "ei", "ei", "ei", "eo", "eo", "eo", "eu", "eae", "eai", "eao", "eā", "eāe", "eāi", "eāo", "eoi", "eoe", "eou", 2302 "ia", "ia", "ie", "io", "io", "iu", "iae", "iai", "iao", "iau", "iā", "iāe", "iāi", "iāo", "iāu", "ioi", "ioe", "iou", 2303 "oa", "oa", "oa", "oa", "oae", "oai", "oao", "oau", "oā", "oā", "oāe", "oāi", "oāo", "oāu", 2304 "oa", "oa", "oa", "oa", "oae", "oai", "oao", "oau", "oā", "oā", "oāe", "oāi", "oāo", "oāu", 2305 "ua", "ue", "ui", "uo", "uae", "uai", "uao", "uau", "uā", "uāe", "uāi", "uāo", "uāu", "uoi", "uoe", "uou", 2306 "aea", "aea", "aei", "aei", "aei", "aeo", "aeo", "aeo", "aeu", 2307 "aia", "aia", "aia", "aia", "aie", "aio", "aio", "aiu", 2308 "aoa", "aoa", 2309 "aua", "aua", "aue", "aue", "aue", "aui", "aui", "auo", 2310 "āea", "āea", "āei", "āei", "āei", "āeo", "āeo", "āeo", "āeu", 2311 "āia", "āia", "āia", "āia", "āie", "āio", "āio", "āiu", 2312 "āoa", "āoa", 2313 "āua", "āua", "āue", "āue", "āue", "āui", "āui", "āuo", 2314 }, 2315 new String[]{}, 2316 new String[]{"h", "h", "k", "k", "m", "m", "m", "m", "n", "n", "p", "p", 2317 "r", "r", "r", "r", "r", "t", "t", "t", "t", "w", "w", "ng", "wh", "wh", "wh", 2318 "h", "k", "m", "m", "m", "m", "n", "n", "p", "p", 2319 "r", "r", "r", "r", "r", "t", "t", "t", "t", "w", "w", "wh", "wh", "wh" 2320 }, 2321 new String[]{"h", "k", "k", "k", "m", "n", "n", "n", "p", "p", "p", "p", "p", 2322 "r", "r", "r", "t", "t", "t", "w", "ng", "ng", "ng", "ng", "wh", "wh" 2323 }, 2324 new String[]{""}, 2325 new String[]{}, 2326 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{5, 5, 4, 2}, 0.2, 1.0, 0.0, 0.0, genericSanityChecks, true); 2327 } 2328 2329 /** 2330 * Imitation text from an approximation of the Maori language, spoken in New Zealand both today and historically, 2331 * and closely related to some other Polynesian languages. This version uses the current standard orthographic 2332 * standard of representing a long "a" with the letter "ā" (adding a macron diacritic). 2333 * <br> 2334 * Māuka whapi enāongupe worute, moa noepo? 2335 */ 2336 public static final FakeLanguageGen MAORI = maori().register("Maori"); 2337 2338 private static FakeLanguageGen spanish(){ 2339 return new FakeLanguageGen( 2340 new String[]{"a", "a", "a", "a", "a", "i", "i", "i", "o", "o", "o", "e", "e", "e", "e", "e", "u", "u"}, 2341 new String[]{"a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", 2342 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2343 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2344 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2345 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2346 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2347 "a", "a", "a", "a", "a", "a", "i", "i", "i", "i", "o", "o", "o", "o", "o", "e", "e", "e", "e", "e", 2348 "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua", 2349 "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua", 2350 "ai", "ai", "eo", "ia", "ia", "ie", "io", "iu", "oi", "ui", "ue", "ua", 2351 "ái", "aí", "éo", "ía", "iá", "íe", "ié", "ío", "íu", "oí", "uí", "ué", "uá", 2352 "á", "é", "í", "ó", "ú", "á", "é", "í", "ó",}, 2353 new String[]{"b", "c", "ch", "d", "f", "g", "gu", "h", "j", "l", "m", "n", "p", "qu", "r", "s", "t", "v", "z", 2354 "b", "s", "z", "r", "n", "h", "j", "j", "s", "c", "r", 2355 "b", "s", "z", "r", "n", "h", "j", "s", "c", "r", 2356 "b", "s", "r", "n", "h", "j", "s", "c", "r", 2357 "n", "s", "l", "c", "n", "s", "l", "c", 2358 "br", "gr", "fr" 2359 }, 2360 new String[]{"ñ", "rr", "ll", "ñ", "rr", "ll", "mb", "nd", "ng", "nqu", "rqu", "zqu", "zc", "rd", "rb", "rt", "rt", "rc", "sm", "sd"}, 2361 new String[]{"r", "n", "s", "s", "r", "n", "s", "s", "r", "n", "s", "s", "r", "n", "s", "s", 2362 "r", "n", "s", "r", "n", "s", "r", "n", "s", "r", "n", "s", 2363 }, 2364 new String[]{"on", "ez", "es", "es", "es", "es", "es", 2365 "ador", "edor", "ando", "endo", "indo", 2366 "ar", "as", "amos", "an", "oy", "ay", 2367 "er", "es", "emos", "en", "e", 2368 "ir", "es", "imos", "en", "io", 2369 "o", "a", "o", "a", "o", "a", "o", "a", "os", "as", "os", "as", "os", "as" 2370 }, 2371 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{4, 5, 3, 1}, 0.1, 1.0, 0.0, 0.3, genericSanityChecks, true) 2372 .addModifiers( 2373 new Modifier("([aeouáéóú])i$", "$1y"), 2374 new Modifier("([qQ])ua", "$1ue"), // guapo, agua, guano, all real Spanish, we should allow gua 2375 new Modifier("([qQ])uá", "$1ué"), 2376 new Modifier("([qgQG])u[ouy]", "$1ui"), 2377 new Modifier("([qgQG])u[óú]", "$1uí")); 2378 } 2379 2380 /** 2381 * Imitation text from an approximation of Spanish (not using the variations spoken in Spain, but closer to Latin 2382 * American forms of Spanish). This isn't as close as possible, but it abides by most of the orthographic rules that 2383 * Spanish uses. It uses the acute accent on the vowels á, é, í, ó, and ú, as well as the consonant ñ. 2384 * <br> 2385 * Jamos daí oñuezqui, luarbezquisdas canga ombiurta irri hoño resda! 2386 */ 2387 public static final FakeLanguageGen SPANISH = spanish().register("Spanish"); 2388 2389 private static FakeLanguageGen deepSpeech(){ 2390 return new FakeLanguageGen( 2391 new String[]{ 2392 "a", "a", "o", "o", "o", "o", "u", "u", "u", "u", 2393 "a", "a", "o", "o", "o", "o", "u", "u", "u", "u", 2394 "a", "a", "o", "o", "o", "o", "u", "u", "u", "u", 2395 "a", "a", "o", "o", "o", "o", "u", "u", "u", "u", 2396 "a", "a", "o", "o", "o", "o", "u", "u", "u", "u", 2397 "aa", "aa", "oo", "oo", "oo", "oo", "uu", "uu", "uu", "uu", 2398 "aa", "aa", "oo", "oo", "oo", "oo", "uu", "uu", "uu", "uu", 2399 "ah", "ah", "oh", "oh", "oh", "oh", "uh", "uh", "uh", "uh", 2400 "aah", "ooh", "ooh", "uuh", "uuh", 2401 }, 2402 new String[]{}, 2403 new String[]{ 2404 "m", "ng", "r", "x", "y", "z", "v", "l", 2405 "m", "ng", "r", "x", "y", "z", "v", "l", 2406 "m", "ng", "r", "x", "y", "z", "v", "l", 2407 "m", "ng", "r", "x", "y", "z", "v", "l", 2408 "m", "ng", "r", "x", "y", "z", "v", "l", 2409 "m", "ng", "r", "z", "l", 2410 "m", "ng", "r", "z", "l", 2411 "m", "ng", "r", "z", "l", 2412 "m", "ng", "r", "z", "l", 2413 "mr", "vr", "ry", "zr", 2414 "mw", "vw", "ly", "zw", 2415 "zl", "vl" 2416 }, 2417 new String[]{ 2418 }, 2419 new String[]{ 2420 "m", "ng", "r", "x", "z", "v", "l", 2421 "m", "ng", "r", "x", "z", "v", "l", 2422 "m", "ng", "r", "x", "z", "v", "l", 2423 "m", "ng", "r", "x", "z", "v", "l", 2424 "rm", "rng", "rx", "rz", "rv", "rl", 2425 "lm", "lx", "lz", "lv", 2426 }, 2427 new String[]{}, 2428 new String[]{"'"}, new int[]{1, 2, 3, 4}, new double[]{3, 6, 5, 1}, 0.18, 0.25, 0.07, 0.0, null, true); 2429 } 2430 2431 /** 2432 * Fantasy/sci-fi language that would potentially be fitting for a trade language spoken by various very-different 2433 * groups, such as creatures with tentacled faces who need to communicate with spider-elves and living crystals. 2434 * This language tries to use relatively few sounds so vocally-restricted species can speak it or approximate it, 2435 * but some of its sounds are uncommon. It uses "ng" as Vietnamese does, as a sound that can be approximated with 2436 * "w" but more accurately is like the sound at the end of "gong". It uses a breathy sound in many vowels, 2437 * represented by "h", and this is separate from (and can be combined with) lengthening the vowel by doubling it 2438 * ("a", "ah", "aa", and "aah" are different). The "x" sound can be approximated by any of the "kh" or "q" sounds 2439 * used in various human languages, or with its usage in English for "ks". This does separate some vowels with "'", 2440 * which can be a glottal stop as in Hawaiian or various other languages, or approximated with a brief pause. 2441 * <br> 2442 * Zrolmolurz, voluu, nguu yuh'ongohng! 2443 */ 2444 public static final FakeLanguageGen DEEP_SPEECH = deepSpeech().register("Deep Speech"); 2445 /** 2446 * Somewhat close to Old Norse, which is itself very close to Icelandic, but changed to avoid letters not on a 2447 * US-ASCII keyboard. Not to be confused with the language(s) of Norway, where the Norwegian languages are called 2448 * norsk, and are further distinguished into Bokmål and Nynorsk. This just applies {@link Modifier#SIMPLIFY_NORSE} 2449 * to {@link #NORSE}. This replaces eth ('Ðð') and thorn ('Þþ') with 'th' unless preceded by 's' (where 'sð' or 'sþ' 2450 * becomes "st") or followed by 'r' (where 'ðr' or 'þr' becomes 'fr'). It replaces 'Æ' or 'æ' with 'Ae' or 'ae', and 2451 * replaces 'Ö' or 'ö' with 'Ou' or "ou", which can change the length of a String relative to NORSE. It removes all 2452 * other accent marks (since the two-dot umlaut accent has already been changed, this only affects acute accents). 2453 * It also changes some of the usage of "j" where it means the English "y" sound, making "fjord" into "fyord", which 2454 * is closer to familiar uses from East Asia like "Tokyo" and "Pyongyang". 2455 * <br> 2456 * Leyrk tyour stomri kna sno aed frepdapa, prygso? 2457 */ 2458 public static final FakeLanguageGen NORSE_SIMPLIFIED = norse().addModifiers(Modifier.SIMPLIFY_NORSE) 2459 .register("Norse Simplified"); 2460 2461 private static FakeLanguageGen hletkip(){ 2462 return new FakeLanguageGen( 2463 new String[]{"a", "a", "a", "e", "e", "e", "e", "e", "i", "i", "i", "i", 2464 "o", "o", "u", "u", "u", "u",}, 2465 new String[]{}, 2466 new String[]{ 2467 "hf", "hl", "hm", "hn", "hr", "hs", "hv", "hw", "hy", "hz", 2468 "br", "kr", "fr", "mr", "nr", "pr", "khr", "shr", "zhr", "sr", "vr", "thr", "zv", "zr", 2469 "by", "ky", "fy", "my", "ny", "py", "khy", "shy", "zhy", "ry", "sy", "vy", "thy", "zy", 2470 "bl", "kl", "fl", "ml", "nl", "pl", "khl", "shl", "zhl", "sl", "vl", "thl", "lw", "zl", 2471 "bf", "kf", "mf", "nf", "pf", "fsh", "shf", "fr", "sf", "fl", "fr", "fw", "fz", 2472 "bs", "ks", "fs", "ms", "ns", "ps", "skh", "shs", "khs", "shv","shw", 2473 "pkh", "psh", "pth", "pw", "tkh", "tsh", "tth", "tw", "sht", "bkh", "bsh", "bth", "bw", 2474 "dkh", "dth", "dw", "dzh", "khg", "shg", "thg", "gw", "zhg", "khk", "thk", "kw", 2475 }, 2476 new String[]{ 2477 "hf", "hl", "hm", "hn", "hr", "hs", "hv", "hw", "hy", "hz", 2478 "br", "kr", "fr", "mr", "nr", "pr", "khr", "shr", "zhr", "sr", "vr", "thr", "zv", "zr", 2479 "by", "ky", "fy", "my", "ny", "py", "khy", "shy", "zhy", "ry", "sy", "vy", "thy", "zy", 2480 "bl", "kl", "fl", "ml", "nl", "pl", "khl", "shl", "zhl", "sl", "vl", "thl", "lw", "zl", 2481 "bf", "kf", "mf", "nf", "pf", "fsh", "shf", "fr", "sf", "fl", "fr", "fw", "fz", 2482 "bs", "ks", "fs", "ms", "ns", "ps", "skh", "shs", "khs", "shv","shw", 2483 "pkh", "psh", "pth", "pw", "tkh", "tsh", "tth", "tw", "bkh", "bsh", "bth", "bw", 2484 "dkh", "dsh", "dth", "dw", "khg", "shg", "thg", "gw", "khk", "thk", "kw", 2485 "rb", "rk", "rf", "rm", "rn", "rp", "rkh", "rsh", "rzh", "rh", "rv", "rw", "rz", "rl", 2486 "lb", "lk", "lf", "lm", "ln", "lp", "lkh", "lsh", "lzh", "lh", "lv", "lw", "lz", "lr", 2487 "sb", "sk", "sf", "sm", "sn", "sp", "skh", "gsh", "dzh", "sh", "sv", "sw", "sz", "ts", "st", 2488 "mb", "md", "mk", "mf", "tm", "nm", "mp", "mkh", "msh", "mzh", "mh", "mv", "mw", "mt", "mz", 2489 "nb", "nd", "nk", "nf", "tn", "mn", "np", "nkh", "nsh", "nzh", "nh", "nv", "nw", "nt", "nz", 2490 "zb", "zd", "zk", "zf", "zt", "nz", "zp", "zkh", "zhz", "dz", "hz", "zv", "zw", "tz", 2491 }, 2492 new String[]{ 2493 }, 2494 new String[]{"ip", "ik", "id", "iz", "ir", "ikh", "ish", "is", "ith", "iv", "in", "im", "ib", "if", 2495 "ep", "ek", "ed", "ez", "er", "ekh", "esh", "es", "eth", "ev", "en", "em", "eb", "ef", 2496 "up", "ud", "uz", "ur", "ush", "us", "uth", "uv", "un", "um", "ub", "uf", 2497 }, 2498 new String[]{}, new int[]{1, 2, 3}, new double[]{1, 1, 1}, 0.0, 0.4, 0.0, 1.0, null, true); 2499 } 2500 2501 /** 2502 * A fictional language that could ostensibly be spoken by some group of humans, but that isn't closely based on any 2503 * one real-world language. It is meant to have a mix of hard and flowing sounds, roughly like Hebrew or Turkish, 2504 * but with a very different set of consonants and consonant blends. Importantly, consonant sounds are always paired 2505 * here except for the final consonant of a word, which is always one consonant sound if it is used at all. The 2506 * choices of consonant sounds are designed to be unusual, like "hl", "pkh", and "zhg" (which can all start a word). 2507 * <br> 2508 * Nyep khruv kwolbik psesh klulzhanbik psahzahwuth bluryup; hnish zhrim? 2509 */ 2510 public static final FakeLanguageGen HLETKIP = hletkip().register("Hletkip"); 2511 2512 private static FakeLanguageGen ancientEgyptian(){ 2513 return new FakeLanguageGen( 2514 new String[]{"a", "a", "a", "a", "a", "aa", "e", "e", "e", "e", "e", "e", "e", "i", "i", "i", 2515 "u", "u", "u",}, 2516 new String[]{}, 2517 new String[]{ 2518 "b", 2519 "p", "p", "p", 2520 "f", "f", "f", "f", "f", 2521 "m", "m", "m", "m", "m", "m", 2522 "n", "n", "n", "n", "n", 2523 "r", "r", "r", "r", "r", "r", 2524 "h", "h", "h", "h", "h", "h", "h", "h", 2525 "kh", "kh", "kh", "kh", "kh", "kh", 2526 "z", 2527 "s", "s", "s", "s", "s", "s", "s", "s", 2528 "sh", "sh", "sh", "sh", 2529 "k", "k", "k", "k", "k", 2530 "g", "g", "g", 2531 "t", "t", "t", "t", "t", "t", 2532 "th", "th", "th", 2533 "d", "d", "d", 2534 "dj", 2535 "w", "w", "w", 2536 "pt" 2537 }, 2538 new String[]{ 2539 "b", 2540 "p", "p", "p", "pw", "pkh", "ps", "ps", "pt", 2541 "f", "f", "f", "f", "f", "ft", 2542 "m", "m", "m", "m", "m", "m", "mk", "nm", 2543 "n", "n", "n", "n", "n", "nkh", "nkh", "nk", "nt", "ns", 2544 "r", "r", "r", "r", "r", "r", "rs", "rt", 2545 "h", "h", "h", "h", "h", "h", "h", "h", 2546 "kh", "kh", "kh", "kh", "kh", "kh", "khm", "khm", "khw", 2547 "z", 2548 "s", "s", "s", "s", "s", "s", "s", "s", "st", "sk", "skh", 2549 "sh", "sh", "sh", "sh", "shw", 2550 "k", "k", "k", "k", "k", "kw", 2551 "g", "g", "g", 2552 "t", "t", "t", "t", "t", "t", "ts", 2553 "th", "th", "th", 2554 "d", "d", "d", "ds", 2555 "dj", 2556 "w", "w", "w", 2557 }, 2558 new String[]{ 2559 "m", "n", "t", "s", "p", "sh", "m", "n", "t", "s", "p", "sh", "m", "n", "t", "s", "p", "sh", 2560 "kh", "f" 2561 }, 2562 new String[]{"amon", "amun", "ut", "epsut", "is", "is", "ipsis", "akhti", "eftu", "atsut", "amses" 2563 }, 2564 new String[]{"-"}, new int[]{1, 2, 3, 4}, new double[]{4, 7, 3, 2}, 0.5, 0.4, 0.06, 0.09, null, true); 2565 } 2566 2567 /** 2568 * A (necessarily) very rough anglicization of Old Egyptian, a language that has no precisely known pronunciation 2569 * rules and was written with hieroglyphics. This is meant to serve as an analogue for any ancient language with few 2570 * contemporary speakers. 2571 * <br> 2572 * Thenamses upekha efe emesh nabasu ahakhepsut! 2573 */ 2574 // for future reference, consult https://en.wiktionary.org/wiki/Module:egy-pron-Egyptological 2575 public static final FakeLanguageGen ANCIENT_EGYPTIAN = ancientEgyptian().register("Ancient Egyptian"); 2576 2577 private static FakeLanguageGen crow(){ 2578 return new FakeLanguageGen( 2579 new String[]{"a", "a", "a", "a", "a","a", "a", "a","a", "a", "a", "á", "á", "aa", "aa", "áá", "áa", 2580 "e", "e", "e", "e", "e", "e", "ee", "ée", "é", "éé", 2581 "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "ii", "íí", "íi", "í", 2582 "o", "o", "o", "o", "o", "o", "o", "oo", "óó", "óo", "ó", 2583 "u", "u","u", "u","u", "u","u", "u", "u", "u", "uu", "úú", "úu", "ú", 2584 "ia", "ua", "ia", "ua", "ia", "ua", "ia", "ua", "ía", "úa" 2585 }, 2586 new String[]{ 2587 }, 2588 new String[]{ 2589 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2590 "k", "k", "m", "k", "k", "m", "d", "s"}, 2591 new String[]{ 2592 "bb", "pp", "ss", "kk", "ll", "mm", "nn", "dd", "tt", 2593 "kk", "kk", "mm", "kk", "kk", "mm", "dd", "ss", 2594 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2595 "k", "k", "m", "k", "k", "m", "d", "s", 2596 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2597 "k", "k", "m", "k", "k", "m", "d", "s", 2598 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2599 "k", "k", "m", "k", "k", "m", "d", "s", 2600 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2601 "k", "k", "m", "k", "k", "m", "d", "s", 2602 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2603 "k", "k", "m", "k", "k", "m", "d", "s", 2604 "b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2605 "k", "k", "m", "k", "k", "m", "d", "s" 2606 }, 2607 new String[]{"b", "p", "s", "x", "k", "l", "m", "n", "d", "t", "h", "w", "ch", "sh", 2608 "k", "k", "m", "k", "k", "m", "d", "s" 2609 }, 2610 new String[]{ 2611 }, 2612 new String[]{"-"}, new int[]{1, 2, 3, 4, 5}, new double[]{5, 7, 6, 4, 2}, 0.4, 1.0, 0.12, 0.0, null, true); 2613 } 2614 /** 2615 * A rough imitation of the Crow language of the American Midwest, using some tone marks. Some of the orthography 2616 * rules aren't clear across Internet information about the language, so this really is a "fake" language it will be 2617 * generating, not the real thing at all. This considers 'x' to be the rough back-of-throat noise that isn't in 2618 * English other than in loanwords, like the Scottish "loch," and in names like the German "Bach." Doubled (to use 2619 * the linguistic term, geminated) consonants are pronounced for a longer time, and doubled vowels with the same 2620 * accent mark or no accent mark are also lengthened. An un-accented vowel has a normal tone, an accented vowel has 2621 * a high tone, and an accented vowel followed by an un-accented vowel has a falling tone. This last feature is the 2622 * least common among languages here, and is a good way of distinguishing imitation Crow from other languages. 2623 * <br> 2624 * Pashu-umíkiki; chinébúlu ak kóokutú shu-eníí-a ipíimúu heekokáakoku? 2625 */ 2626 public static final FakeLanguageGen CROW = crow().register("Crow"); 2627 2628 private static FakeLanguageGen imp(){ 2629 return new FakeLanguageGen( 2630 new String[]{"a", "a", "a", "a", "a", "á", "á", "á", "aa", "aa", "aa", "aaa", "aaa", "aaa", "áá", "áá", "ááá", "ááá", 2631 "e", "e", "e", "e", "e", "e", 2632 "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "í", "í", "í", "í", 2633 "ii", "ii", "ii", "iii", "iii", "iii", "íí", "íí", "ííí", "ííí", 2634 "u", "u", "u", "u", "u", "u", "u", "u", "ú", "ú", "ú", "uu", "uu", "uu", "úú", "úú", "úúú", "úúú", 2635 "ia", "ia", "ia", "ui", "ui" 2636 }, 2637 new String[]{ 2638 }, 2639 new String[]{ 2640 "s", "k", "d", "t", "h", "f", "g", "r", "r", "r", "r", "gh", "ch", 2641 "sk", "st", "skr", "str", "kr", "dr", "tr", "fr", "gr" 2642 }, 2643 new String[]{ 2644 "s", "k", "d", "t", "h", "f", "g", "r", "r", "r", "r", "gh", "ch", 2645 "sk", "st", "skr", "str", "kr", "dr", "tr", "fr", "gr" 2646 }, 2647 new String[]{ 2648 "s", "k", "d", "t", "g", "gh", "ch" 2649 }, 2650 new String[]{ 2651 }, 2652 new String[]{"-"}, new int[]{1, 2, 3}, new double[]{7, 11, 4}, 0.2, 0.5, 0.4, 0.0, null, true); 2653 } 2654 /** 2655 * A fantasy language meant for obnoxious screeching annoying enemies more-so than for intelligent friends or foes. 2656 * Uses accented vowels to mean "louder or higher-pitched" and up to three repeats of any vowel to lengthen it. 2657 * <br> 2658 * Siii-aghak fítríííg dú-úgh ru-úúk, grííírá! 2659 */ 2660 public static final FakeLanguageGen IMP = imp().register("Imp"); 2661 2662 private static FakeLanguageGen malay(){ 2663 return new FakeLanguageGen( 2664 new String[]{ 2665 "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "ai", "ai", "au", 2666 "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", 2667 "i", "i", "i", "i", "i", "i", "i", "i", "ia", "ia", 2668 "o", "o", "o", "o", "o", "o", "ou", 2669 "u", "u", "u", "u", "u", "u", "u", "u", "u", "ua", "ua",}, 2670 new String[]{}, 2671 new String[]{ 2672 "b", "b", "b", "b", 2673 "ch", 2674 "d", "d", "d", "d", 2675 "f", 2676 "g", "g", 2677 "h", "h", 2678 "j", "j", "j", "j", 2679 "k", "k", "k", "k", "k", "k", 2680 "kh", 2681 "l", "l", "l", "l", "l", "l", "l", 2682 "m", "m", "m", "m", 2683 "n", "n", "n", 2684 "p", "p", "p", "p", "p", 2685 "r", "r", 2686 "s", "s", "s", "s", "s", 2687 "sh", "sh", 2688 "t", "t", "t", "t", 2689 "w", 2690 "y", 2691 "z", 2692 }, 2693 new String[]{ 2694 "b", "b", "b", "b", 2695 "ch", 2696 "d", "d", "d", "d", 2697 "f", 2698 "g", "g", 2699 "h", "h", "h", "h", "h", 2700 "j", "j", "j", 2701 "k", "k", "k", "k", "k", "k", "k", "k", "k", 2702 "kn", 2703 "kh", 2704 "l", "l", "l", "l", "l", "l", "l", 2705 "m", "m", "m", "m", "m", "m", 2706 "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 2707 "nt", "nt", "nj", 2708 "ng", "ng", "ng", "ng", 2709 "ngk","ngg", 2710 "ny", "ny", 2711 "p", "p", "p", "p", "p", 2712 "r", "r", "r", "r", "r", "r", "r", "r", 2713 "rb", "rd", "rg", "rk", "rs", "rt", "rn", "rn", 2714 "s", "s", "s", "s", "s", "s", 2715 "sh", "sh", 2716 "t", "t", "t", "t", "t", "t", 2717 "w", 2718 "y", 2719 "z", 2720 }, 2721 new String[]{ 2722 "k", "k", "k", "k", "k", "k", "t", "t", "t", "n", "n", "n", "n", "n", "n", "n", "n", 2723 "ng", "ng", "ng", "m", "m", "m", "s", "s", "l", "l", "l", "l", "l", "h", "h" 2724 }, 2725 new String[]{"uk", "uk", "ok", "an", "at", "ul", "ang", "ih", "it", "is", "ung", "un", "ah" 2726 }, 2727 new String[]{}, new int[]{1, 2, 3}, new double[]{5, 3, 2}, 0.2, 0.25, 0.0, 0.2, genericSanityChecks, true); 2728 } 2729 2730 /** 2731 * An approximation of the Malay language or any of its close relatives, such as Indonesian. This differs from Malay 2732 * as it is normally written by using "ch" for what Malay writes as "c" (it is pronounced like the start of "chow"), 2733 * and "sh" for what Malay writes as "sy" (pronounced like the start of "shoe"). 2734 * <br> 2735 * Kashanyah satebok bisal bekain akinuk an as, penah lukul... 2736 */ 2737 public static final FakeLanguageGen MALAY = malay().register("Malay"); 2738 private static FakeLanguageGen celestial(){ 2739 return new FakeLanguageGen( 2740 new String[]{ 2741 "a", "a", "a", "a", "a", "a", "a", "e", "e", "e", "i", "i", "i", "i", "i", "o", "o", "o", 2742 "a", "a", "a", "a", "a", "a", "a", "e", "e", "e", "i", "i", "i", "i", "i", "o", "o", "o", 2743 "ă", "ă", "ĕ", "ĭ", "ŏ" 2744 }, 2745 new String[]{}, 2746 new String[]{ 2747 "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j", 2748 "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j", 2749 "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", "h", "y", "w", "j", 2750 "n", "m", "v", "s", "z", "h", "y", "w", "j", 2751 "n", "m", "v", "s", "z", "h", "y", "w", "j", 2752 "n", "m", "s", "h", "y", "j", 2753 "n", "m", "s", "h", "y", "j", 2754 "n", "m", "s", "h", "y", "j", 2755 "h", "h", "h", "h", "h", "h", "h", "h", 2756 "m", "m", "m", "m", "m", "m", 2757 "ry", "ly", "by", "dy", "ny", "my", "vy", "by", "dy", "sy", "zy", 2758 "bl", "br", "dr", "shl", "shr", "hr" 2759 }, 2760 new String[]{ 2761 "j", "j", "j", 2762 "mh", "mb", "md", "mr", "ms", "mz", "mv", 2763 "nh", "nb", "nd", "nr", "ns", "nz", "nv", 2764 "zh", "zb", "zd", "zr", "zv", 2765 "bd", "db", "bm", "bn", "dm", "dn", 2766 "ry", "ly", "by", "dy", "ny", "my", "vy", "by", "dy", "sy", "zy", "wy", "jy", 2767 "bl", "br", "dr", "shl", "shr", "hr" 2768 }, 2769 new String[]{ 2770 "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", "z", 2771 "l", "r", "n", "m", "v", "b", "d", "s", "th", "sh", 2772 "l", "r", "n", "m", "v", "b", "d", "th", 2773 "l", "r", "n", "m", "b", "d", "th", 2774 "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m", "r", "n", "m", 2775 }, 2776 new String[]{ 2777 "am", "an", "ar", "av", "em", "el", "ez", "eth", "ev", "es", "im", "id", "in", "oth", "om", 2778 "ar", "el", "es", "im", "oth", 2779 "ăyom", "ĕzra", "ĭdniv", "ŏlor", "evyăd", "iyĕr", "abĭl", "onrŏv" 2780 }, 2781 new String[]{"'"}, new int[]{1, 2, 3}, new double[]{5, 6, 2}, 0.45, 0.1, 0.04, 0.14, genericSanityChecks, true); 2782 } 2783 2784 /** 2785 * Fantasy language that is meant to sound like it could be spoken by divine or (magical) otherworldly beings. 2786 * Sometimes uses the breve mark (as in {@code ăĕĭŏ}) over vowels and rarely splits consonants with {@code '}. 2787 * Uses very few harsh sounds, and may be easy to confuse with {@link #ELF} (this tends to use much shorter words). 2788 * This happens to sound a little like Hebrew, but since this doesn't have some consonants that are commonly used in 2789 * Hebrew, and because this uses accented vowels that aren't in Hebrew, they should be different enough that this 2790 * language can seem "not of this world." 2791 * <br> 2792 * Emŏl ebin hanzi'ab, isharar omrihrel nevyăd. 2793 */ 2794 public static final FakeLanguageGen CELESTIAL = celestial().register("Celestial"); 2795 2796 private static FakeLanguageGen chinese(){ 2797 return new FakeLanguageGen( 2798 new String[]{ 2799 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", "yū", "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", "yú", 2800 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", "yǔ", "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", "yù", 2801 "a", "e", "i", "o", "u", "a", "i", "o", "u", "yu", "a", "e", "i", "o", "u", "a", "i", "o", "u", "yu", 2802 }, 2803 new String[]{ 2804 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2805 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2806 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2807 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2808 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2809 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2810 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2811 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2812 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2813 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2814 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2815 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2816 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2817 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2818 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2819 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2820 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2821 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2822 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2823 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2824 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2825 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2826 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2827 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2828 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2829 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2830 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2831 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2832 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2833 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2834 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2835 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2836 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2837 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2838 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2839 "ā", "ē", "ī", "ō", "ū", "ā", "ī", "ō", "ū", 2840 "á", "é", "í", "ó", "ú", "á", "í", "ó", "ú", 2841 "ǎ", "ě", "ǐ", "ǒ", "ǔ", "ǎ", "ǐ", "ǒ", "ǔ", 2842 "à", "è", "ì", "ò", "ù", "à", "ì", "ò", "ù", 2843 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2844 2845 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2846 "a", "e", "i", "o", "u", "a", "i", "o", "u", 2847 2848 "āí", "āó", "āú", "ēá", "īá", "īú", "ōá", "ūá", "ūé", 2849 "āǐ", "āǒ", "āǔ", "ēǎ", "īǎ", "īǔ", "ōǎ", "ūǎ", "ūě", 2850 "āì", "āò", "āù", "ēà", "īà", "īù", "ōà", "ūà", "ūè", 2851 "āi", "āo", "āu", "ēa", "īa", "īu", "ōa", "ūa", "ūe", 2852 2853 "áī", "áō", "áū", "éā", "íā", "íū", "óā", "úā", "úē", 2854 "áǐ", "áǒ", "áǔ", "éǎ", "íǎ", "íǔ", "óǎ", "ǔǎ", "ǔě", 2855 "áì", "áò", "áù", "éà", "íà", "íù", "óà", "ùà", "ùè", 2856 "ái", "áo", "áu", "éa", "ía", "íu", "óa", "ua", "ue", 2857 2858 "ǎī", "ǎō", "ǎū", "ěā", "ǐā", "ǐū", "ǒā", "ǔā", "ǔē", 2859 "ǎí", "ǎó", "ǎú", "ěá", "ǐá", "ǐú", "ǒá", "ǔá", "ǔé", 2860 "ǎì", "ǎò", "ǎù", "ěà", "ǐà", "ǐù", "ǒà", "ǔà", "ǔè", 2861 "ǎi", "ǎo", "ǎu", "ěa", "ǐa", "ǐu", "ǒa", "ǔa", "ǔe", 2862 2863 "àī", "àō", "àū", "èā", "ìā", "ìū", "òā", "ùā", "ùē", 2864 "àí", "àó", "àú", "èá", "ìá", "ìú", "òá", "ùá", "ùé", 2865 "àǐ", "àǒ", "àǔ", "èǎ", "ìǎ", "ìǔ", "òǎ", "ùǎ", "ùě", 2866 "ài", "ào", "àu", "èa", "ìa", "ìu", "òa", "ùa", "ùe", 2867 2868 "aī", "aō", "aū", "eā", "iā", "iū", "oā", "uā", "uē", 2869 "aí", "aó", "aú", "eá", "iá", "iú", "oá", "uá", "ué", 2870 "aǐ", "aǒ", "aǔ", "eǎ", "iǎ", "iǔ", "oǎ", "uǎ", "uě", 2871 "aì", "aò", "aù", "eà", "ià", "iù", "oà", "uà", "uè", 2872 2873 "yū", "yú", "yū", "yú", "yū", "yú", 2874 "yǔ", "yù", "yǔ", "yù", "yǔ", "yù", 2875 "yu", "yu", "yu", "yu", "yu", "yu", 2876 }, 2877 new String[]{ 2878 "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", 2879 "zh", "ch", "sh", "r", "z", "ts", "s", 2880 2881 "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2882 "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2883 "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2884 "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2885 }, 2886 new String[]{ 2887 "nb", "np", "nf", "nd", "nt", "nl", "ng", "nk", "nj", "nq", "nx", "nzh", "nch", "nsh", "nz", "nts", "ns", 2888 "nb", "np", "nf", "nd", "nt", "nl", "ng", "nk", "nj", "nq", "nx", "nzh", "nch", "nsh", "nz", "nts", "ns", 2889 2890 "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "ts", "s", 2891 2892 "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2893 "b", "p", "m", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2894 "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2895 "d", "t", "g", "k", "j", "q", "x", "zh", "ch", "sh", "z", "ts", "s", 2896 }, 2897 new String[]{ 2898 "n", "n", "n", "n", "n", "n", "n", 2899 "ng", "ng", "ng", "ng", "ng", "ng", 2900 "r", "r", "r", 2901 }, 2902 new String[]{}, 2903 new String[]{}, new int[]{1, 2, 3}, new double[]{14, 3, 1}, 0.175, 0.55, 0.0, 0.0, genericSanityChecks, true); 2904 } 2905 2906 /** 2907 * An approximation of Hanyu Pinyin, a Romanization technique used for Mandarin Chinese that has been in common use 2908 * since the 1980s. This makes some slight changes so the vulgarity filters this uses can understand how some 2909 * letters sound; Pinyin's letter c becomes ts, and this replaces the u with umlaut, ü, in all cases with yu. 2910 * <br> 2911 * Tuàn tiāzhǎn dér, ǔngínbǔng xōr shàū kán nu tsīn. 2912 */ 2913 public static final FakeLanguageGen CHINESE_ROMANIZED = chinese().register("Chinese Romanized"); 2914 2915 private static FakeLanguageGen cherokee(){ 2916 return new FakeLanguageGen( 2917 new String[]{ 2918 "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", 2919 "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", "a", "e", "i", "o", "u", "ü", 2920 "ai", "au", "oa", "oi", "ai", "au", "oa", "oi", 2921 "a", "a", "a", "a", "a", "a", "a", "a", "a", 2922 "ah", "ah", "ah", "ah", "ah", "ah", "ah", 2923 }, 2924 new String[]{ 2925 }, 2926 new String[]{ 2927 "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y", 2928 "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y", 2929 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2930 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2931 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2932 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2933 "g", "h", "n", "qu", "s", "d", "t", 2934 "g", "h", "n", "qu", "s", "d", "t", 2935 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2936 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2937 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2938 }, 2939 new String[]{ 2940 "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y", 2941 "g", "k", "h", "l", "n", "qu", "s", "d", "t", "dl", "ts", "w", "y", 2942 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2943 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2944 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2945 "g", "h", "l", "n", "qu", "s", "d", "t", "ts", "y", 2946 "g", "h", "n", "qu", "s", "d", "t", 2947 "g", "h", "n", "qu", "s", "d", "t", 2948 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2949 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2950 "h", "n", "s", "d", "t", "h", "n", "s", "d", "t", 2951 "sn", "sn", "st", "st", "squ", "squ", 2952 "th", "kh", "sh", "th", "kh", "sh", "th", "kh", "sh", 2953 "th", "sh", "th", "sh", "th", "sh", "th", "sh", 2954 }, 2955 new String[]{ 2956 "s" 2957 }, 2958 new String[]{ 2959 }, 2960 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{4, 7, 6, 2}, 0.3, 0.96, 0.0, 0.0, null, true); 2961 } 2962 /** 2963 * A rough imitation of the Cherokee language, using an attempt at romanizing the syllabary the language is often 2964 * written with, using only the parts of the language that are usually written down. Some of the orthography 2965 * rules aren't clear across Internet information about the language, so this really is a "fake" language it will be 2966 * generating, not the real thing at all. The vowel 'ü' is used in place of the 'v' that the normal transliteration 2967 * uses, to help with profanity-checking what this generates; it is pronounced like in the French word "un". 2968 * <br> 2969 * Dah utugü tsahnahsütoi gohü usütahdi asi tsau dah tashi. 2970 */ 2971 public static final FakeLanguageGen CHEROKEE_ROMANIZED = cherokee().register("Cherokee Romanized"); 2972 2973 private static FakeLanguageGen vietnamese() { 2974 return new FakeLanguageGen(new String[]{ 2975 "a", "à", "á", "â", "ä", "ā", "ă", 2976 "e", "è", "é", "ê", "ë", "ē", "ĕ", 2977 "i", "ì", "í", "î", "ï", "ī", "ĭ", 2978 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 2979 "u", "ù", "ú", "û", "ü", "ū", "ŭ", 2980 }, 2981 new String[]{ 2982 "a", "à", "á", "â", "ä", "ā", "ă", 2983 "a", "à", "á", "â", "ä", "ā", "ă", 2984 "a", "à", "á", "â", "ä", "ā", "ă", 2985 "a", "à", "á", "â", "ä", "ā", "ă", 2986 "e", "è", "é", "ê", "ë", "ē", "ĕ", 2987 "i", "ì", "í", "î", "ï", "ī", "ĭ", 2988 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 2989 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 2990 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 2991 "u", "ù", "ú", "û", "ü", "ū", "ŭ", 2992 2993 "a", "à", "á", "â", "ä", "ā", "ă", 2994 "a", "à", "á", "â", "ä", "ā", "ă", 2995 "a", "à", "á", "â", "ä", "ā", "ă", 2996 "a", "à", "á", "â", "ä", "ā", "ă", 2997 "e", "è", "é", "ê", "ë", "ē", "ĕ", 2998 "i", "ì", "í", "î", "ï", "ī", "ĭ", 2999 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3000 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3001 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3002 "u", "ù", "ú", "û", "ü", "ū", "ŭ", 3003 3004 "a", "à", "á", "â", "ä", "ā", "ă", 3005 "a", "à", "á", "â", "ä", "ā", "ă", 3006 "a", "à", "á", "â", "ä", "ā", "ă", 3007 "a", "à", "á", "â", "ä", "ā", "ă", 3008 "e", "è", "é", "ê", "ë", "ē", "ĕ", 3009 "i", "ì", "í", "î", "ï", "ī", "ĭ", 3010 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3011 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3012 "o", "ò", "ó", "ô", "ö", "ō", "ŏ", 3013 "u", "ù", "ú", "û", "ü", "ū", "ŭ", 3014 3015 "ua", "uà", "uá", "uâ", "uä", "uā", "uă", 3016 "ie", "iè", "ié", "iê", "ië", "iē", "iĕ", 3017 "ie", "iè", "ié", "iê", "ië", "iē", "iĕ", 3018 "ie", "ìe", "íe", "îe", "ïe", "īe", "ĭe", 3019 "iu", "ìu", "íu", "îu", "ïu", "īu", "ĭu", 3020 "oi", "òi", "ói", "ôi", "öi", "ōi", "ŏi", 3021 "uo", "ùo", "úo", "ûo", "üo", "ūo", "ŭo", 3022 "uo", "ùo", "úo", "ûo", "üo", "ūo", "ŭo", 3023 3024 "y", "y", "y", "y", "y", "y", "y", 3025 "ye", "yè", "yé", "yê", "yë", "yē", "yĕ", 3026 }, 3027 new String[]{ 3028 "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "l", "m", "n", "ng", "nh", "p", "ph", "qu", "r", 3029 "s", "t", "th", "tr", "v", "x", 3030 "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3031 "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3032 "b", "c", "d", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3033 "b", "c", "d", "l", "n", "ng", "p", "ph", "th", "tr", 3034 "b", "c", "d", "l", "n", "ng", "p", "ph", "th", "tr", 3035 "b", "c", "d", "l", "n", "ng", "p", 3036 "b", "c", "d", "l", "n", "ng", "p", 3037 "b", "c", "d", "l", "n", "ng", "p", 3038 }, new String[]{ 3039 "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "l", "m", "n", "ng", "nh", "p", "ph", "qu", "r", 3040 "s", "t", "th", "tr", "v", "x", 3041 "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3042 "b", "c", "d", "ð", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3043 "b", "c", "d", "h", "l", "m", "n", "ng", "p", "ph", "t", "th", "tr", "v", 3044 "b", "c", "d", "l", "n", "ng", "p", "ph", "t", "th", "tr", 3045 "b", "c", "d", "l", "n", "ng", "p", "ph", "t", "th", "tr", 3046 "b", "c", "d", "l", "n", "ng", "p", "t", 3047 "b", "c", "d", "l", "n", "ng", "p", "t", 3048 "b", "c", "d", "l", "n", "ng", "p", 3049 }, 3050 new String[]{ 3051 "b", "c", "ch", "d", "ð", "g", "h", "k", "kh", "m", "m", "n", "ng", "nh", "p", "ch", "r", 3052 "s", "t", "x", 3053 "b", "c", "d", "ð", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c", 3054 "b", "c", "d", "ð", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c", 3055 "b", "c", "d", "h", "m", "m", "n", "ng", "p", "n", "t", "nh", "ng", "c", 3056 "b", "c", "d", "m", "n", "ng", "p", "n", "t", "nh", "ng", 3057 "b", "c", "d", "m", "n", "ng", "p", "n", "t", "nh", "ng", 3058 "b", "c", "d", "m", "n", "ng", "p", "t", 3059 "b", "c", "d", "m", "n", "ng", "p", "t", 3060 "b", "c", "d", "m", "n", "ng", "p", 3061 }, new String[]{}, new String[]{}, new int[]{1, 2, 3}, new double[]{37.0, 3.0, 1.0}, 3062 0.04, 0.4, 0.0, 0.0, genericSanityChecks, true); 3063 } 3064 /** 3065 * A very rough imitation of the Vietnamese language, without using the accurate characters Vietnamese really uses 3066 * but that are rare in fonts. Since so many letters in correct Vietnamese aren't available in most fonts, this 3067 * can't represent most of the accented vowels in the language, but it tries, with 6 accents for each of a, e, i, o, 3068 * and u, though none for y. It also uses 'ð' from Icelandic in place of the correct d with bar. This could also 3069 * maybe be used as an approximation of (badly) Romanized Thai, since Thai normally uses its own script but also has 3070 * many tones (which would be indicated by the accents here). 3071 * <br> 3072 * Bach trich, nŏ ngiukh nga cä tran ngonh... 3073 */ 3074 public static final FakeLanguageGen VIETNAMESE = vietnamese().register("Vietnamese"); 3075 3076 /** 3077 * An array that stores all the hand-made FakeLanguageGen constants; it does not store randomly-generated languages 3078 * nor does it store modifications or mixes of languages. The order these are stored in is related to the numeric 3079 * codes for languages in the {@link #serializeToString()} output, but neither is dependent on the other if this 3080 * array is changed for some reason (which is not recommended, but not out of the question). If this is modified, 3081 * then it is probably a bad idea to assign null to any elements in registered; special care is taken to avoid null 3082 * elements in its original state, so some code may rely on the items being usable and non-null. 3083 */ 3084 public static final FakeLanguageGen[] registered; 3085 public static final String[] registeredNames; 3086 static { 3087 // the first item in registry is null so it can be a placeholder for random languages; we want to skip it. 3088 registered = new FakeLanguageGen[registry.size()-1]; 3089 registeredNames = new String[registered.length]; 3090 for (int i = 0; i < registered.length; i++) { 3091 registeredNames[i] = registry.keyAt(i+1); 3092 registered[i] = registry.getAt(i+1); 3093 } 3094 } 3095 3096 /** 3097 * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen by 3098 * name (using a name from {@link #registeredNames}). 3099 * @param name a String name such as "English", "Korean Romanized", or "Russian Authentic" 3100 * @return a FakeLanguageGen corresponding to the given name, or null if none was found 3101 */ 3102 public static FakeLanguageGen get(String name) 3103 { 3104 return registry.get(name); 3105 } 3106 /** 3107 * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen by 3108 * index, from 0 to {@code FakeLanguageGen.registered.length - 1}. 3109 * @param index an int from 0 to {@code FakeLanguageGen.registered.length - 1} 3110 * @return a FakeLanguageGen corresponding to the given index, or null if none was found 3111 */ 3112 public static FakeLanguageGen getAt(int index) 3113 { 3114 return registry.getAt(index); 3115 } 3116 /** 3117 * If a FakeLanguageGen is known and is in {@link #registered}, this allows you to look up that FakeLanguageGen's 3118 * name by index, from 0 to {@code FakeLanguageGen.registeredNames.length - 1}. 3119 * @param index an int from 0 to {@code FakeLanguageGen.registeredNames.length - 1} 3120 * @return a FakeLanguageGen corresponding to the given index, or null if none was found 3121 */ 3122 public static String nameAt(int index) 3123 { 3124 return registry.keyAt(index); 3125 } 3126 3127 /** 3128 * FakeLanguageGen constants that are meant to sound like specific real-world languages, and that all use the Latin 3129 * script (like English) with maybe some accents. 3130 */ 3131 public static final FakeLanguageGen[] romanizedHumanLanguages = { 3132 ENGLISH, KOREAN_ROMANIZED, SPANISH, SWAHILI, NORSE_SIMPLIFIED, ARABIC_ROMANIZED, HINDI_ROMANIZED, FRENCH, 3133 MAORI, GREEK_ROMANIZED, INUKTITUT, RUSSIAN_ROMANIZED, NAHUATL, JAPANESE_ROMANIZED, MONGOLIAN, SOMALI, CROW, 3134 ANCIENT_EGYPTIAN, MALAY, CHINESE_ROMANIZED, CHEROKEE_ROMANIZED, VIETNAMESE 3135 }; 3136 3137 /** 3138 * Zero-arg constructor for a FakeLanguageGen; produces a FakeLanguageGen equivalent to FakeLanguageGen.ENGLISH . 3139 */ 3140 public FakeLanguageGen() { 3141 this( 3142 new String[]{ 3143 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 3144 "a", "a", "a", "a", "o", "o", "o", "e", "e", "e", "e", "e", "i", "i", "i", "i", "u", 3145 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 3146 "a", "a", "a", "o", "o", "e", "e", "e", "i", "i", "i", "u", 3147 "au", "ai", "ai", "ou", "ea", "ie", "io", "ei", 3148 }, 3149 new String[]{"u", "u", "oa", "oo", "oo", "oo", "ee", "ee", "ee", "ee",}, 3150 new String[]{ 3151 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gl", "gr", "h", "j", "k", "l", "m", "n", 3152 "p", "pl", "pr", "qu", "r", "s", "sh", "sk", "st", "sp", "sl", "sm", "sn", "t", "tr", "th", "thr", "v", "w", "y", "z", 3153 "b", "bl", "br", "c", "cl", "cr", "ch", "d", "dr", "f", "fl", "fr", "g", "gr", "h", "j", "k", "l", "m", "n", 3154 "p", "pl", "pr", "r", "s", "sh", "st", "sp", "sl", "t", "tr", "th", "w", "y", 3155 "b", "br", "c", "ch", "d", "dr", "f", "g", "h", "j", "l", "m", "n", 3156 "p", "r", "s", "sh", "st", "sl", "t", "tr", "th", 3157 "b", "d", "f", "g", "h", "l", "m", "n", 3158 "p", "r", "s", "sh", "t", "th", 3159 "b", "d", "f", "g", "h", "l", "m", "n", 3160 "p", "r", "s", "sh", "t", "th", 3161 "r", "s", "t", "l", "n", 3162 "str", "spr", "spl", "wr", "kn", "kn", "gn", 3163 }, 3164 new String[]{"x", "cst", "bs", "ff", "lg", "g", "gs", 3165 "ll", "ltr", "mb", "mn", "mm", "ng", "ng", "ngl", "nt", "ns", "nn", "ps", "mbl", "mpr", 3166 "pp", "ppl", "ppr", "rr", "rr", "rr", "rl", "rtn", "ngr", "ss", "sc", "rst", "tt", "tt", "ts", "ltr", "zz" 3167 }, 3168 new String[]{"b", "rb", "bb", "c", "rc", "ld", "d", "ds", "dd", "f", "ff", "lf", "rf", "rg", "gs", "ch", "lch", "rch", "tch", 3169 "ck", "ck", "lk", "rk", "l", "ll", "lm", "m", "rm", "mp", "n", "nk", "nch", "nd", "ng", "ng", "nt", "ns", "lp", "rp", 3170 "p", "r", "rn", "rts", "s", "s", "s", "s", "ss", "ss", "st", "ls", "t", "t", "ts", "w", "wn", "x", "ly", "lly", "z", 3171 "b", "c", "d", "f", "g", "k", "l", "m", "n", "p", "r", "s", "t", "w", 3172 }, 3173 new String[]{"ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 3174 "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 3175 "ate", "ite", "ism", "ist", "er", "er", "er", "ed", "ed", "ed", "es", "es", "ied", "y", "y", "y", "y", 3176 "ay", "ay", "ey", "oy", "ay", "ay", "ey", "oy", 3177 "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition", 3178 "ough", "aught", "ant", "ont", "oe", "ance", "ell", "eal", "oa", "urt", "ut", "iom", "ion", "ion", "ision", "ation", "ation", "ition", 3179 "ily", "ily", "ily", "adly", "owly", "oorly", "ardly", "iedly", 3180 }, 3181 new String[]{}, new int[]{1, 2, 3, 4}, new double[]{10, 11, 4, 1}, 0.22, 0.1, 0.0, 0.22, englishSanityChecks, true); 3182 } 3183 3184 /** 3185 * This is a very complicated constructor! Maybe look at the calls to this to initialize static members of this 3186 * class, LOVECRAFT and GREEK_ROMANIZED. 3187 * 3188 * @param openingVowels String array where each element is a vowel or group of vowels that may appear at the start 3189 * of a word or in the middle; elements may be repeated to make them more common 3190 * @param midVowels String array where each element is a vowel or group of vowels that may appear in the 3191 * middle of the word; all openingVowels are automatically copied into this internally. 3192 * Elements may be repeated to make them more common 3193 * @param openingConsonants String array where each element is a consonant or consonant cluster that can appear 3194 * at the start of a word; elements may be repeated to make them more common 3195 * @param midConsonants String array where each element is a consonant or consonant cluster than can appear 3196 * between vowels; all closingConsonants are automatically copied into this internally. 3197 * Elements may be repeated to make them more common 3198 * @param closingConsonants String array where each element is a consonant or consonant cluster than can appear 3199 * at the end of a word; elements may be repeated to make them more common 3200 * @param closingSyllables String array where each element is a syllable starting with a vowel and ending in 3201 * whatever the word should end in; elements may be repeated to make them more common 3202 * @param vowelSplitters String array where each element is a mark that goes between vowels, so if "-" is in this, 3203 * then "a-a" may be possible; elements may be repeated to make them more common 3204 * @param syllableLengths int array where each element is a possible number of syllables a word can use; closely 3205 * tied to syllableFrequencies 3206 * @param syllableFrequencies double array where each element corresponds to an element in syllableLengths and 3207 * represents how often each syllable count should appear relative to other counts; there 3208 * is no need to restrict the numbers to add up to any other number 3209 * @param vowelStartFrequency a double between 0.0 and 1.0 that determines how often words start with vowels; 3210 * higher numbers yield more words starting with vowels 3211 * @param vowelEndFrequency a double between 0.0 and 1.0 that determines how often words end with vowels; higher 3212 * numbers yield more words ending in vowels 3213 * @param vowelSplitFrequency a double between 0.0 and 1.0 that, if vowelSplitters is not empty, determines how 3214 * often a vowel will be split into two vowels separated by one of those splitters 3215 * @param syllableEndFrequency a double between 0.0 and 1.0 that determines how often an element of 3216 * closingSyllables is used instead of ending normally 3217 */ 3218 public FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants, 3219 String[] midConsonants, String[] closingConsonants, String[] closingSyllables, String[] vowelSplitters, 3220 int[] syllableLengths, double[] syllableFrequencies, double vowelStartFrequency, 3221 double vowelEndFrequency, double vowelSplitFrequency, double syllableEndFrequency) { 3222 this(openingVowels, midVowels, openingConsonants, midConsonants, closingConsonants, closingSyllables, 3223 vowelSplitters, syllableLengths, syllableFrequencies, vowelStartFrequency, vowelEndFrequency, 3224 vowelSplitFrequency, syllableEndFrequency, englishSanityChecks, true); 3225 } 3226 3227 /** 3228 * This is a very complicated constructor! Maybe look at the calls to this to initialize static members of this 3229 * class, LOVECRAFT and GREEK_ROMANIZED. 3230 * 3231 * @param openingVowels String array where each element is a vowel or group of vowels that may appear at the start 3232 * of a word or in the middle; elements may be repeated to make them more common 3233 * @param midVowels String array where each element is a vowel or group of vowels that may appear in the 3234 * middle of the word; all openingVowels are automatically copied into this internally. 3235 * Elements may be repeated to make them more common 3236 * @param openingConsonants String array where each element is a consonant or consonant cluster that can appear 3237 * at the start of a word; elements may be repeated to make them more common 3238 * @param midConsonants String array where each element is a consonant or consonant cluster than can appear 3239 * between vowels; all closingConsonants are automatically copied into this internally. 3240 * Elements may be repeated to make them more common 3241 * @param closingConsonants String array where each element is a consonant or consonant cluster than can appear 3242 * at the end of a word; elements may be repeated to make them more common 3243 * @param closingSyllables String array where each element is a syllable starting with a vowel and ending in 3244 * whatever the word should end in; elements may be repeated to make them more common 3245 * @param vowelSplitters String array where each element is a mark that goes between vowels, so if "-" is in this, 3246 * then "a-a" may be possible; elements may be repeated to make them more common 3247 * @param syllableLengths int array where each element is a possible number of syllables a word can use; closely 3248 * tied to syllableFrequencies 3249 * @param syllableFrequencies double array where each element corresponds to an element in syllableLengths and 3250 * represents how often each syllable count should appear relative to other counts; there 3251 * is no need to restrict the numbers to add up to any other number 3252 * @param vowelStartFrequency a double between 0.0 and 1.0 that determines how often words start with vowels; 3253 * higher numbers yield more words starting with vowels 3254 * @param vowelEndFrequency a double between 0.0 and 1.0 that determines how often words end with vowels; higher 3255 * numbers yield more words ending in vowels 3256 * @param vowelSplitFrequency a double between 0.0 and 1.0 that, if vowelSplitters is not empty, determines how 3257 * often a vowel will be split into two vowels separated by one of those splitters 3258 * @param syllableEndFrequency a double between 0.0 and 1.0 that determines how often an element of 3259 * closingSyllables is used instead of ending normally 3260 * @param sane true to perform sanity checks for pronounce-able sounds to most English speakers, replacing many 3261 * words that are impossible to say; slows down generation slightly, irrelevant for non-Latin alphabets 3262 * @param clean true to perform vulgarity/obscenity checks on the word, replacing it if it is too close to a 3263 * common English vulgarity, obscenity, or slur/epithet; slows down generation slightly 3264 */ 3265 public FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants, 3266 String[] midConsonants, String[] closingConsonants, String[] closingSyllables, String[] vowelSplitters, 3267 int[] syllableLengths, double[] syllableFrequencies, double vowelStartFrequency, 3268 double vowelEndFrequency, double vowelSplitFrequency, double syllableEndFrequency, 3269 Pattern[] sane, boolean clean) { 3270 this.openingVowels = openingVowels; 3271 this.midVowels = new String[openingVowels.length + midVowels.length]; 3272 System.arraycopy(midVowels, 0, this.midVowels, 0, midVowels.length); 3273 System.arraycopy(openingVowels, 0, this.midVowels, midVowels.length, openingVowels.length); 3274 this.openingConsonants = openingConsonants; 3275 this.midConsonants = new String[midConsonants.length + closingConsonants.length]; 3276 System.arraycopy(midConsonants, 0, this.midConsonants, 0, midConsonants.length); 3277 System.arraycopy(closingConsonants, 0, this.midConsonants, midConsonants.length, closingConsonants.length); 3278 this.closingConsonants = closingConsonants; 3279 this.vowelSplitters = vowelSplitters; 3280 this.closingSyllables = closingSyllables; 3281 3282 this.syllableFrequencies = new double[syllableLengths[syllableLengths.length - 1]]; 3283 totalSyllableFrequency = 0.0; 3284 for (int i = 0; i < syllableLengths.length; i++) { 3285 totalSyllableFrequency += (this.syllableFrequencies[syllableLengths[i]-1] = syllableFrequencies[i]); 3286 } 3287 3288 if (vowelStartFrequency > 1.0) 3289 this.vowelStartFrequency = 1.0 / vowelStartFrequency; 3290 else 3291 this.vowelStartFrequency = vowelStartFrequency; 3292 if (vowelEndFrequency > 1.0) 3293 this.vowelEndFrequency = 1.0 / vowelEndFrequency; 3294 else 3295 this.vowelEndFrequency = vowelEndFrequency; 3296 if (vowelSplitters.length == 0) 3297 this.vowelSplitFrequency = 0.0; 3298 else if (vowelSplitFrequency > 1.0) 3299 this.vowelSplitFrequency = 1.0 / vowelSplitFrequency; 3300 else 3301 this.vowelSplitFrequency = vowelSplitFrequency; 3302 if (closingSyllables.length == 0) 3303 this.syllableEndFrequency = 0.0; 3304 else if (syllableEndFrequency > 1.0) 3305 this.syllableEndFrequency = 1.0 / syllableEndFrequency; 3306 else 3307 this.syllableEndFrequency = syllableEndFrequency; 3308 this.clean = clean; 3309 sanityChecks = sane; 3310 modifiers = new ArrayList<>(4); 3311 } 3312 3313 private FakeLanguageGen(String[] openingVowels, String[] midVowels, String[] openingConsonants, 3314 String[] midConsonants, String[] closingConsonants, String[] closingSyllables, 3315 String[] vowelSplitters, double[] syllableFrequencies, 3316 double vowelStartFrequency, double vowelEndFrequency, double vowelSplitFrequency, 3317 double syllableEndFrequency, Pattern[] sanityChecks, boolean clean, 3318 List<Modifier> modifiers) { 3319 this.openingVowels = copyStrings(openingVowels); 3320 this.midVowels = copyStrings(midVowels); 3321 this.openingConsonants = copyStrings(openingConsonants); 3322 this.midConsonants = copyStrings(midConsonants); 3323 this.closingConsonants = copyStrings(closingConsonants); 3324 this.closingSyllables = copyStrings(closingSyllables); 3325 this.vowelSplitters = copyStrings(vowelSplitters); 3326 this.syllableFrequencies = Arrays.copyOf(syllableFrequencies, syllableFrequencies.length); 3327 this.vowelStartFrequency = vowelStartFrequency; 3328 this.vowelEndFrequency = vowelEndFrequency; 3329 this.vowelSplitFrequency = vowelSplitFrequency; 3330 this.syllableEndFrequency = syllableEndFrequency; 3331 for (int i = 0; i < syllableFrequencies.length; i++) { 3332 totalSyllableFrequency += syllableFrequencies[i]; 3333 } 3334 if (sanityChecks == null) 3335 this.sanityChecks = null; 3336 else { 3337 this.sanityChecks = new Pattern[sanityChecks.length]; 3338 System.arraycopy(sanityChecks, 0, this.sanityChecks, 0, sanityChecks.length); 3339 } 3340 this.clean = clean; 3341 this.modifiers = new ArrayList<>(modifiers); 3342 } 3343 3344 private static String[] processParts(OrderedMap<String, String> parts, Set<String> missingSounds, 3345 Set<String> forbidden, IRNG rng, double repeatSingleChance, 3346 int preferredLimit) { 3347 int l, sz = parts.size(); 3348 List<String> working = new ArrayList<>(sz * 24); 3349 String pair; 3350 for (int e = 0; e < parts.size(); e++) { 3351 Map.Entry<String, String> sn = parts.entryAt(e); 3352 if (missingSounds.contains(sn.getKey())) 3353 continue; 3354 for (String t : sn.getValue().split(" ")) { 3355 if (forbidden.contains(t)) 3356 continue; 3357 l = t.length(); 3358 int num; 3359 char c; 3360 switch (l) { 3361 case 0: 3362 break; 3363 case 1: 3364 working.add(t); 3365 working.add(t); 3366 working.add(t); 3367 c = t.charAt(0); 3368 num = 0; 3369 boolean repeat = true; 3370 switch (c) { 3371 case 'w': 3372 num += 2; 3373 case 'y': 3374 case 'h': 3375 num += 4; 3376 case 'q': 3377 case 'x': 3378 num += 4; 3379 repeat = false; 3380 break; 3381 case 'i': 3382 case 'u': 3383 repeat = false; 3384 num = 13; 3385 break; 3386 case 'z': 3387 case 'v': 3388 num = 4; 3389 break; 3390 case 'j': 3391 num = 7; 3392 break; 3393 default: 3394 if (e >= preferredLimit) 3395 num = 6; 3396 else 3397 num = 13; 3398 } 3399 for (int i = 0; i < num * 3; i++) { 3400 if (rng.nextDouble() < 0.75) { 3401 working.add(t); 3402 } 3403 } 3404 3405 if (repeat && rng.nextDouble() < repeatSingleChance) { 3406 pair = t + t; 3407 if (missingSounds.contains(pair)) 3408 continue; 3409 working.add(pair); 3410 working.add(pair); 3411 working.add(pair); 3412 if (rng.nextDouble() < 0.7) { 3413 working.add(pair); 3414 working.add(pair); 3415 } 3416 if (rng.nextDouble() < 0.7) { 3417 working.add(pair); 3418 } 3419 } 3420 3421 break; 3422 case 2: 3423 if (rng.nextDouble() < 0.65) { 3424 c = t.charAt(1); 3425 switch (c) { 3426 case 'z': 3427 num = 1; 3428 break; 3429 case 'w': 3430 num = 3; 3431 break; 3432 case 'n': 3433 num = 4; 3434 break; 3435 default: 3436 3437 if (e >= preferredLimit) 3438 num = 2; 3439 else 3440 num = 7; 3441 } 3442 working.add(t); 3443 for (int i = 0; i < num; i++) { 3444 if (rng.nextDouble() < 0.25) { 3445 working.add(t); 3446 } 3447 } 3448 } 3449 break; 3450 case 3: 3451 if (rng.nextDouble() < 0.5) { 3452 c = t.charAt(0); 3453 switch (c) { 3454 case 'z': 3455 num = 1; 3456 break; 3457 case 'w': 3458 num = 3; 3459 break; 3460 case 'n': 3461 num = 4; 3462 break; 3463 default: 3464 if (e >= preferredLimit) 3465 num = 2; 3466 else 3467 num = 6; 3468 } 3469 working.add(t); 3470 for (int i = 0; i < num; i++) { 3471 if (rng.nextDouble() < 0.2) { 3472 working.add(t); 3473 } 3474 } 3475 } 3476 break; 3477 default: 3478 if (rng.nextDouble() < 0.3 && (t.charAt(l - 1) != 'z' || rng.nextDouble() < 0.1)) { 3479 working.add(t); 3480 } 3481 break; 3482 } 3483 } 3484 } 3485 return working.toArray(new String[0]); 3486 } 3487 3488 /*private static final String[][] openVowels = new String[][]{ 3489 new String[]{"a", "a", "a", "a", "aa", "ae", "ai", "au", "ea", "ia", "oa", "ua",}, 3490 new String[]{"e", "e", "e", "e", "ae", "ea", "ee", "ei", "eo", "eu", "ie", "ue",}, 3491 new String[]{"i", "i", "i", "i", "ai", "ei", "ia", "ie", "io", "iu", "oi", "ui",}, 3492 new String[]{"o", "o", "o", "o", "eo", "io", "oa", "oi", "oo", "ou",}, 3493 new String[]{"u", "u", "u", "u", "au", "eu", "iu", "ou", "ua", "ue", "ui",}, 3494 }; 3495*/ 3496 3497 public static FakeLanguageGen randomLanguage(IRNG rng) { 3498 return randomLanguage(rng.nextLong()); 3499 } 3500 3501 public static FakeLanguageGen randomLanguage(long seed) { 3502 GWTRNG rng = new GWTRNG(seed); 3503 int[] lengths = new int[rng.between(3, 5)]; 3504 System.arraycopy(new int[]{1, 2, 3, 4}, 0, lengths, 0, lengths.length); 3505 double[] chances = new double[lengths.length]; 3506 System.arraycopy(new double[]{ 3507 5 + rng.nextDouble(4), 13 + rng.nextDouble(9), 3 + rng.nextDouble(3), 1 + rng.nextDouble(2) 3508 }, 0, chances, 0, chances.length); 3509 double vowelHeavy = rng.between(0.2, 0.5), removalRate = rng.between(0.15, 0.65); 3510 int sz = openCons.size(); 3511 int[] reordering = rng.randomOrdering(sz), vOrd = rng.randomOrdering(openVowels.size()); 3512 OrderedMap<String, String> 3513 parts0 = new OrderedMap<>(openVowels), 3514 parts1 = new OrderedMap<>(openCons), 3515 parts2 = new OrderedMap<>(midCons), 3516 parts3 = new OrderedMap<>(closeCons); 3517 OrderedSet<String> forbidden = new OrderedSet<>(1024, 0.25f), missingSounds = new OrderedSet<>(64, 0.875f); 3518 parts1.reorder(reordering); 3519 parts2.reorder(reordering); 3520 parts3.reorder(reordering); 3521 parts0.reorder(vOrd); 3522 int n; 3523 3524 int mn = Math.min(rng.nextInt(3), rng.nextInt(3)), sz0, p0s; 3525 3526 for (n = 0; n < mn; n++) { 3527 missingSounds.add(parts0.keyAt(0)); 3528 Collections.addAll(forbidden, parts0.getAt(0).split(" ")); 3529 parts0.removeFirst(); 3530 } 3531 p0s = parts0.size(); 3532 sz0 = Math.max(rng.between(1, p0s + 1), rng.between(1, p0s + 1)); 3533 char[] nextAccents = new char[sz0], unaccented = new char[sz0]; 3534 int vowelAccent = rng.between(1, 7); 3535 for (int i = 0; i < sz0; i++) { 3536 nextAccents[i] = accentedVowels[vOrd[i + mn]][vowelAccent]; 3537 unaccented[i] = accentedVowels[vOrd[i + mn]][0]; 3538 } 3539 if (rng.nextDouble() < 0.8) { 3540 for (int i = 0; i < sz0; i++) { 3541 char ac = nextAccents[i], ua = unaccented[i]; 3542 String v = "", uas = String.valueOf(ua); 3543 Pattern pat = Pattern.compile("\\b([aeiou]*)(" + ua + ")([aeiou]*)\\b"); 3544 Replacer rep = pat.replacer("$1$2$3 $1" + ac + "$3"), repLess = pat.replacer("$1" + ac + "$3"); 3545 for (int j = 0; j < p0s; j++) { 3546 String k = parts0.keyAt(j); 3547 if (uas.equals(k)) // uas is never null, always length 1 3548 v = parts0.getAt(j); 3549 else { 3550 String current = parts0.getAt(j); 3551 String[] splits = current.split(" "); 3552 for (int s = 0; s < splits.length; s++) { 3553 if (forbidden.contains(uas) && splits[s].contains(uas)) 3554 forbidden.add(splits[s].replace(ua, ac)); 3555 } 3556 parts0.put(k, rep.replace(current)); 3557 } 3558 } 3559 parts0.put(String.valueOf(ac), repLess.replace(v)); 3560 } 3561 } 3562 3563 n = 0; 3564 if (rng.nextDouble() < 0.75) { 3565 missingSounds.add("z"); 3566 Collections.addAll(forbidden, parts1.get("z").split(" ")); 3567 Collections.addAll(forbidden, parts2.get("z").split(" ")); 3568 Collections.addAll(forbidden, parts3.get("z").split(" ")); 3569 n++; 3570 } 3571 if (rng.nextDouble() < 0.82) { 3572 missingSounds.add("x"); 3573 Collections.addAll(forbidden, parts1.get("x").split(" ")); 3574 Collections.addAll(forbidden, parts2.get("x").split(" ")); 3575 Collections.addAll(forbidden, parts3.get("x").split(" ")); 3576 n++; 3577 } 3578 if (rng.nextDouble() < 0.92) { 3579 missingSounds.add("qu"); 3580 Collections.addAll(forbidden, parts1.get("qu").split(" ")); 3581 Collections.addAll(forbidden, parts2.get("qu").split(" ")); 3582 Collections.addAll(forbidden, parts3.get("qu").split(" ")); 3583 n++; 3584 } 3585 if (rng.nextDouble() < 0.96) { 3586 missingSounds.add("q"); 3587 Collections.addAll(forbidden, parts1.get("q").split(" ")); 3588 Collections.addAll(forbidden, parts2.get("q").split(" ")); 3589 Collections.addAll(forbidden, parts3.get("q").split(" ")); 3590 n++; 3591 } 3592 if (rng.nextDouble() < 0.97) { 3593 missingSounds.add("tl"); 3594 Collections.addAll(forbidden, parts1.get("tl").split(" ")); 3595 Collections.addAll(forbidden, parts2.get("tl").split(" ")); 3596 Collections.addAll(forbidden, parts3.get("tl").split(" ")); 3597 n++; 3598 } 3599 if (rng.nextDouble() < 0.86) { 3600 missingSounds.add("ph"); 3601 Collections.addAll(forbidden, parts1.get("ph").split(" ")); 3602 Collections.addAll(forbidden, parts2.get("ph").split(" ")); 3603 Collections.addAll(forbidden, parts3.get("ph").split(" ")); 3604 n++; 3605 } 3606 if (rng.nextDouble() < 0.94) { 3607 missingSounds.add("kh"); 3608 Collections.addAll(forbidden, parts1.get("kh").split(" ")); 3609 Collections.addAll(forbidden, parts2.get("kh").split(" ")); 3610 Collections.addAll(forbidden, parts3.get("kh").split(" ")); 3611 n++; 3612 } 3613 if (rng.nextDouble() < 0.96) { 3614 missingSounds.add("bh"); 3615 missingSounds.add("dh"); 3616 Collections.addAll(forbidden, parts1.get("bh").split(" ")); 3617 Collections.addAll(forbidden, parts2.get("bh").split(" ")); 3618 Collections.addAll(forbidden, parts3.get("bh").split(" ")); 3619 Collections.addAll(forbidden, parts1.get("dh").split(" ")); 3620 Collections.addAll(forbidden, parts2.get("dh").split(" ")); 3621 Collections.addAll(forbidden, parts3.get("dh").split(" ")); 3622 n++; 3623 n++; 3624 } 3625 3626 for (; n < sz * removalRate; n++) { 3627 missingSounds.add(parts1.keyAt(n)); 3628 missingSounds.add(parts2.keyAt(n)); 3629 missingSounds.add(parts3.keyAt(n)); 3630 Collections.addAll(forbidden, parts1.getAt(n).split(" ")); 3631 Collections.addAll(forbidden, parts2.getAt(n).split(" ")); 3632 Collections.addAll(forbidden, parts3.getAt(n).split(" ")); 3633 } 3634 3635 return new FakeLanguageGen( 3636 processParts(parts0, missingSounds, forbidden, rng, 0.0, p0s), 3637 new String[]{}, 3638 processParts(openCons, missingSounds, forbidden, rng, 0.0, 4096), 3639 processParts(midCons, missingSounds, forbidden, rng, (rng.nextDouble() * 3 - 0.75) * 0.4444, 4096), 3640 processParts(closeCons, missingSounds, forbidden, rng, (rng.nextDouble() * 3 - 0.75) * 0.2857, 4096), 3641 new String[]{}, 3642 new String[]{}, lengths, chances, vowelHeavy, vowelHeavy * 1.8, 0.0, 0.0, genericSanityChecks, true).summarize("0#" + seed + "@1"); 3643 } 3644 3645 protected static boolean checkAll(CharSequence testing, Pattern[] checks) { 3646 CharSequence fixed = removeAccents(testing); 3647 for (int i = 0; i < checks.length; i++) { 3648 if (checks[i].matcher(fixed).find()) 3649 return false; 3650 } 3651 return true; 3652 } 3653 3654 /** 3655 * Checks a CharSequence, such as a String, against an overzealous vulgarity filter, returning true if the text 3656 * could contain vulgar elements or words that could seem vulgar or juvenile. The idea here is that false positives 3657 * are OK as long as there are very few false negatives (missed vulgar words). Does not check punctuation or numbers 3658 * that could look like letters. 3659 * @param testing the text, as a CharSequence such as a String, to check 3660 * @return true if the text could contain a vulgar or juvenile element; false if it probably doesn't 3661 */ 3662 public static boolean checkVulgarity(CharSequence testing) 3663 { 3664 CharSequence fixed = removeAccents(testing); 3665 for (int i = 0; i < vulgarChecks.length; i++) { 3666 if (vulgarChecks[i].matcher(fixed).find()) 3667 { 3668// System.out.println(vulgarChecks[i]); 3669 return true; 3670 } 3671 } 3672 return false; 3673 } 3674 3675 /** 3676 * Generate a word from this FakeLanguageGen, using and changing the current seed. 3677 * 3678 * @param capitalize true if the word should start with a capital letter, false otherwise 3679 * @return a word in the fake language as a String 3680 */ 3681 public String word(boolean capitalize) { 3682 return word(srng, capitalize); 3683 } 3684 3685 /** 3686 * Generate a word from this FakeLanguageGen using the specified long seed to use for a shared StatefulRNG. 3687 * If seed is the same, a FakeLanguageGen should produce the same word every time with this method. 3688 * 3689 * @param seed the seed, as a long, to use for the randomized string building 3690 * @param capitalize true if the word should start with a capital letter, false otherwise 3691 * @return a word in the fake language as a String 3692 */ 3693 public String word(long seed, boolean capitalize) { 3694 srng.setState(seed); 3695 return word(srng, capitalize); 3696 } 3697 3698 /** 3699 * Generate a word from this FakeLanguageGen using the specified RNG. 3700 * 3701 * @param rng the RNG to use for the randomized string building 3702 * @param capitalize true if the word should start with a capital letter, false otherwise 3703 * @return a word in the fake language as a String 3704 */ 3705 public String word(IRNG rng, boolean capitalize) { 3706 while (true) { 3707 sb.setLength(0); 3708 ender.setLength(0); 3709 3710 double syllableChance = rng.nextDouble(totalSyllableFrequency); 3711 int syllables = 1, i = 0; 3712 for (int s = 0; s < syllableFrequencies.length; s++) { 3713 if(syllableChance < syllableFrequencies[s]) 3714 { 3715 syllables = s + 1; 3716 break; 3717 } else 3718 { 3719 syllableChance -= syllableFrequencies[s]; 3720 } 3721 } 3722 if (rng.nextDouble() < vowelStartFrequency) { 3723 sb.append(rng.getRandomElement(openingVowels)); 3724 if (syllables == 1) 3725 sb.append(rng.getRandomElement(closingConsonants)); 3726 else 3727 sb.append(rng.getRandomElement(midConsonants)); 3728 i++; 3729 } else { 3730 sb.append(rng.getRandomElement(openingConsonants)); 3731 } 3732 String close = ""; 3733 boolean redouble = false; 3734 if (i < syllables) { 3735 if (rng.nextDouble() < syllableEndFrequency) { 3736 close = rng.getRandomElement(closingSyllables); 3737 if (close.contains("@") && (syllables & 1) == 0) { 3738 redouble = true; 3739 syllables >>= 1; 3740 3741 //sb.append(close.replaceAll("@\\d", sb.toString())); 3742 } 3743 if (!close.contains("@")) 3744 ender.append(close); 3745 else if (rng.nextDouble() < vowelEndFrequency) { 3746 ender.append(rng.getRandomElement(midVowels)); 3747 if (rng.nextDouble() < vowelSplitFrequency) { 3748 ender.append(rng.getRandomElement(vowelSplitters)) 3749 .append(rng.getRandomElement(midVowels)); 3750 } 3751 } 3752 } else { 3753 ender.append(rng.getRandomElement(midVowels)); 3754 if (rng.nextDouble() < vowelSplitFrequency) { 3755 ender.append(rng.getRandomElement(vowelSplitters)) 3756 .append(rng.getRandomElement(midVowels)); 3757 } 3758 if (rng.nextDouble() >= vowelEndFrequency) { 3759 ender.append(rng.getRandomElement(closingConsonants)); 3760 if (rng.nextDouble() < syllableEndFrequency) { 3761 close = rng.getRandomElement(closingSyllables); 3762 if (close.contains("@") && (syllables & 1) == 0) { 3763 redouble = true; 3764 syllables >>= 1; 3765 3766 //sb.append(close.replaceAll("@\\d", sb.toString())); 3767 } 3768 if (!close.contains("@")) 3769 ender.append(close); 3770 } 3771 } 3772 } 3773 i += vowelClusters.matcher(ender).findAll().count(); 3774 3775 } 3776 3777 for (; i < syllables; i++) { 3778 sb.append(rng.getRandomElement(midVowels)); 3779 if (rng.nextDouble() < vowelSplitFrequency) { 3780 sb.append(rng.getRandomElement(vowelSplitters)) 3781 .append(rng.getRandomElement(midVowels)); 3782 } 3783 sb.append(rng.getRandomElement(midConsonants)); 3784 } 3785 3786 sb.append(ender); 3787 if (redouble && i <= syllables + 1) { 3788 sb.append(close.replaceAll("@", sb.toString())); 3789 } 3790 3791 if (sanityChecks != null && !checkAll(sb, sanityChecks)) 3792 { 3793 continue; 3794 } 3795 3796 for (int m = 0; m < modifiers.size(); m++) { 3797 modifiers.get(m).modify(rng, sb); 3798 } 3799 3800 if (capitalize) 3801 sb.setCharAt(0, Character.toUpperCase(sb.charAt(0))); 3802 3803 if (clean && !checkAll(sb, vulgarChecks)) 3804 { 3805 continue; 3806 } 3807 return sb.toString(); 3808 } 3809 } 3810 3811 /** 3812 * Generate a word from this FakeLanguageGen with an approximate number of syllables using the specified long seed 3813 * to use for a shared StatefulRNG. 3814 * If seed and the other parameters are the same, a FakeLanguageGen should produce the same word every time with 3815 * this method. 3816 * 3817 * @param seed the seed, as a long, to use for the randomized string building 3818 * @param capitalize true if the word should start with a capital letter, false otherwise 3819 * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables 3820 * @return a word in the fake language as a String 3821 */ 3822 public String word(long seed, boolean capitalize, int approxSyllables) { 3823 srng.setState(seed); 3824 return word(srng, capitalize, approxSyllables); 3825 } 3826 3827 /** 3828 * Generate a word from this FakeLanguageGen using the specified RNG with an approximate number of syllables. 3829 * 3830 * @param rng the RNG to use for the randomized string building 3831 * @param capitalize true if the word should start with a capital letter, false otherwise 3832 * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables 3833 * @return a word in the fake language as a String 3834 */ 3835 public String word(IRNG rng, boolean capitalize, int approxSyllables) { 3836 return word(rng, capitalize, approxSyllables, null); 3837 } 3838 /** 3839 * Generate a word from this FakeLanguageGen with an approximate number of syllables using the specified long seed 3840 * to use for a shared StatefulRNG. This takes an array of {@link Pattern} objects (from RegExodus, not 3841 * java.util.regex) that should match invalid outputs, such as words that shouldn't be generated in some context due 3842 * to vulgarity or cultural matters. If seed and the other parameters are the same, a FakeLanguageGen should produce 3843 * the same word every time with this method. 3844 * 3845 * @param seed the seed, as a long, to use for the randomized string building 3846 * @param capitalize true if the word should start with a capital letter, false otherwise 3847 * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables 3848 * @param additionalChecks an array of RegExodus Pattern objects that match invalid words (these may be additional vulgarity checks, for example) 3849 * @return a word in the fake language as a String 3850 */ 3851 public String word(long seed, boolean capitalize, int approxSyllables, Pattern[] additionalChecks) { 3852 srng.setState(seed); 3853 return word(srng, capitalize, approxSyllables, additionalChecks); 3854 } 3855 3856 /** 3857 * Generate a word from this FakeLanguageGen using the specified RNG with an approximate number of syllables. 3858 * This takes an array of {@link Pattern} objects (from RegExodus, not java.util.regex) that should match invalid 3859 * outputs, such as words that shouldn't be generated in some context due to vulgarity or cultural matters. 3860 * 3861 * @param rng the RNG to use for the randomized string building 3862 * @param capitalize true if the word should start with a capital letter, false otherwise 3863 * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables 3864 * @param additionalChecks an array of RegExodus Pattern objects that match invalid words (these may be additional vulgarity checks, for example) 3865 * @return a word in the fake language as a String 3866 */ 3867 public String word(IRNG rng, boolean capitalize, int approxSyllables, Pattern[] additionalChecks) { 3868 if (approxSyllables <= 0) { 3869 sb.setLength(0); 3870 sb.append(rng.getRandomElement(openingVowels)); 3871 for (int m = 0; m < modifiers.size(); m++) { 3872 modifiers.get(m).modify(rng, sb); 3873 } 3874 if (capitalize) sb.setCharAt(0, Character.toUpperCase(sb.charAt(0))); 3875 return sb.toString(); 3876 } 3877 while (true) { 3878 sb.setLength(0); 3879 ender.setLength(0); 3880 int i = 0; 3881 if (rng.nextDouble() < vowelStartFrequency) { 3882 sb.append(rng.getRandomElement(openingVowels)); 3883 if (approxSyllables == 1 && closingConsonants.length > 0) 3884 sb.append(rng.getRandomElement(closingConsonants)); 3885 else if (midConsonants.length > 0) 3886 sb.append(rng.getRandomElement(midConsonants)); 3887 i++; 3888 } else if (openingConsonants.length > 0) { 3889 sb.append(rng.getRandomElement(openingConsonants)); 3890 } 3891 String close = ""; 3892 boolean redouble = false; 3893 if (i < approxSyllables) { 3894 if (closingSyllables.length > 0 && rng.nextDouble() < syllableEndFrequency) { 3895 close = rng.getRandomElement(closingSyllables); 3896 if (close.contains("@") && (approxSyllables & 1) == 0) { 3897 redouble = true; 3898 approxSyllables = approxSyllables >> 1; 3899 3900 //sb.append(close.replaceAll("@\\d", sb.toString())); 3901 } 3902 if (!close.contains("@")) 3903 ender.append(close); 3904 else if (redouble && rng.nextDouble() < vowelEndFrequency) { 3905 ender.append(rng.getRandomElement(midVowels)); 3906 if (vowelSplitters.length > 0 && rng.nextDouble() < vowelSplitFrequency) { 3907 ender.append(rng.getRandomElement(vowelSplitters)) 3908 .append(rng.getRandomElement(midVowels)); 3909 } 3910 } 3911 } else { 3912 ender.append(rng.getRandomElement(midVowels)); 3913 if (rng.nextDouble() < vowelSplitFrequency) { 3914 ender.append(rng.getRandomElement(vowelSplitters)) 3915 .append(rng.getRandomElement(midVowels)); 3916 } 3917 if (rng.nextDouble() >= vowelEndFrequency) { 3918 ender.append(rng.getRandomElement(closingConsonants)); 3919 if (rng.nextDouble() < syllableEndFrequency) { 3920 close = rng.getRandomElement(closingSyllables); 3921 if (close.contains("@") && (approxSyllables & 1) == 0) { 3922 redouble = true; 3923 approxSyllables = approxSyllables >> 1; 3924 3925 //sb.append(close.replaceAll("@\\d", sb.toString())); 3926 } 3927 if (!close.contains("@")) 3928 ender.append(close); 3929 } 3930 } 3931 } 3932 i += vowelClusters.matcher(ender).findAll().count(); 3933 } 3934 3935 for (; i < approxSyllables; i++) { 3936 sb.append(rng.getRandomElement(midVowels)); 3937 if (rng.nextDouble() < vowelSplitFrequency) { 3938 sb.append(rng.getRandomElement(vowelSplitters)) 3939 .append(rng.getRandomElement(midVowels)); 3940 } 3941 sb.append(rng.getRandomElement(midConsonants)); 3942 } 3943 3944 sb.append(ender); 3945 if (redouble && i <= approxSyllables + 1) { 3946 sb.append(close.replaceAll("@", sb.toString())); 3947 } 3948 3949 if (sanityChecks != null && !checkAll(sb, sanityChecks)) 3950 continue; 3951 3952 for (int m = 0; m < modifiers.size(); m++) { 3953 modifiers.get(m).modify(rng, sb); 3954 } 3955 3956 if (clean && !checkAll(sb, vulgarChecks)) 3957 continue; 3958 3959 if (additionalChecks != null && !checkAll(sb, additionalChecks)) 3960 continue; 3961 3962 if (capitalize) 3963 sb.setCharAt(0, Character.toUpperCase(sb.charAt(0))); 3964 3965 return sb.toString(); 3966 } 3967 } 3968 3969 /** 3970 * Generate a word from this FakeLanguageGen using the specified StatefulRNG with an approximate number of 3971 * syllables, potentially setting the state of rng mid-way through the word to another seed from {@code reseeds} 3972 * more than once if the word is long enough. This overload is less likely to be used very often. 3973 * 3974 * @param rng the StatefulRNG to use for the randomized string building 3975 * @param capitalize true if the word should start with a capital letter, false otherwise 3976 * @param approxSyllables the approximate number of syllables to produce in the word; there may be more syllables 3977 * @param reseeds an array or varargs of additional long seeds to seed {@code rng} with mid-generation 3978 * @return a word in the fake language as a String 3979 */ 3980 public String word(IStatefulRNG rng, boolean capitalize, int approxSyllables, long... reseeds) { 3981 if (approxSyllables <= 0) { 3982 sb.setLength(0); 3983 sb.append(rng.getRandomElement(openingVowels)); 3984 for (int m = 0; m < modifiers.size(); m++) { 3985 modifiers.get(m).modify(rng, sb); 3986 } 3987 if (capitalize) sb.setCharAt(0, Character.toUpperCase(sb.charAt(0))); 3988 return sb.toString(); 3989 } 3990 int numSeeds, fraction = 1; 3991 if (reseeds != null) 3992 numSeeds = Math.min(reseeds.length, approxSyllables - 1); 3993 else numSeeds = 0; 3994 while (true) { 3995 sb.setLength(0); 3996 ender.setLength(0); 3997 int i = 0; 3998 if (rng.nextDouble() < vowelStartFrequency) { 3999 sb.append(rng.getRandomElement(openingVowels)); 4000 if (approxSyllables == 1) 4001 sb.append(rng.getRandomElement(closingConsonants)); 4002 else 4003 sb.append(rng.getRandomElement(midConsonants)); 4004 i++; 4005 } else { 4006 sb.append(rng.getRandomElement(openingConsonants)); 4007 } 4008 String close = ""; 4009 boolean redouble = false; 4010 if (i < approxSyllables) { 4011 if (numSeeds > 0 && i > 0 && i == approxSyllables * fraction / (1 + numSeeds)) 4012 rng.setState(reseeds[fraction++ - 1]); 4013 if (rng.nextDouble() < syllableEndFrequency) { 4014 close = rng.getRandomElement(closingSyllables); 4015 if (close.contains("@") && (approxSyllables & 1) == 0) { 4016 redouble = true; 4017 approxSyllables = approxSyllables >> 1; 4018 } 4019 if (!close.contains("@")) 4020 ender.append(close); 4021 else if (rng.nextDouble() < vowelEndFrequency) { 4022 ender.append(rng.getRandomElement(midVowels)); 4023 if (rng.nextDouble() < vowelSplitFrequency) { 4024 ender.append(rng.getRandomElement(vowelSplitters)) 4025 .append(rng.getRandomElement(midVowels)); 4026 } 4027 } 4028 } else { 4029 ender.append(rng.getRandomElement(midVowels)); 4030 if (rng.nextDouble() < vowelSplitFrequency) { 4031 ender.append(rng.getRandomElement(vowelSplitters)) 4032 .append(rng.getRandomElement(midVowels)); 4033 } 4034 if (rng.nextDouble() >= vowelEndFrequency) { 4035 ender.append(rng.getRandomElement(closingConsonants)); 4036 if (rng.nextDouble() < syllableEndFrequency) { 4037 close = rng.getRandomElement(closingSyllables); 4038 if (close.contains("@") && (approxSyllables & 1) == 0) { 4039 redouble = true; 4040 approxSyllables = approxSyllables >> 1; 4041 4042 //sb.append(close.replaceAll("@\\d", sb.toString())); 4043 } 4044 if (!close.contains("@")) 4045 ender.append(close); 4046 } 4047 } 4048 } 4049 i += vowelClusters.matcher(ender).findAll().count(); 4050 } 4051 4052 for (; i < approxSyllables; i++) { 4053 if (numSeeds > 0 && i > 0 && i == approxSyllables * fraction / (1 + numSeeds)) 4054 rng.setState(reseeds[fraction++ - 1]); 4055 sb.append(rng.getRandomElement(midVowels)); 4056 if (rng.nextDouble() < vowelSplitFrequency) { 4057 sb.append(rng.getRandomElement(vowelSplitters)) 4058 .append(rng.getRandomElement(midVowels)); 4059 } 4060 sb.append(rng.getRandomElement(midConsonants)); 4061 } 4062 4063 sb.append(ender); 4064 if (redouble && i <= approxSyllables + 1) { 4065 sb.append(close.replaceAll("@", sb.toString())); 4066 } 4067 4068 if (sanityChecks != null && !checkAll(sb, sanityChecks)) 4069 continue; 4070 4071 for (int m = 0; m < modifiers.size(); m++) { 4072 modifiers.get(m).modify(rng, sb); 4073 } 4074 4075 if (capitalize) 4076 sb.setCharAt(0, Character.toUpperCase(sb.charAt(0))); 4077 4078 if (clean && !checkAll(sb, vulgarChecks)) 4079 continue; 4080 return sb.toString(); 4081 } 4082 } 4083 4084 private static final String[] mid = {",", ",", ",", ";"}, end = {".", ".", ".", "!", "?", "..."}; 4085 4086 /** 4087 * Generate a sentence from this FakeLanguageGen, using and changing the current seed, with the length in words 4088 * between minWords and maxWords, both inclusive. This can use commas and semicolons between words, and can end a 4089 * sentence with ".", "!", "?", or "...". 4090 * 4091 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4092 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4093 * @return a sentence in the fake language as a String 4094 */ 4095 public String sentence(int minWords, int maxWords) { 4096 return sentence(srng, minWords, maxWords, mid, end, 0.2); 4097 } 4098 4099 /** 4100 * Generate a sentence from this FakeLanguageGen, using the given seed as a long, with the length in words between 4101 * minWords and maxWords, both inclusive. This can use commas and semicolons between words, and can end a 4102 * sentence with ".", "!", "?", or "...". 4103 * 4104 * @param seed the seed, as a long, for the randomized string building 4105 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4106 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4107 * @return a sentence in the fake language as a String 4108 */ 4109 public String sentence(long seed, int minWords, int maxWords) { 4110 srng.setState(seed); 4111 return sentence(srng, minWords, maxWords); 4112 } 4113 4114 /** 4115 * Generate a sentence from this FakeLanguageGen, using the given RNG, with the length in words between minWords and 4116 * maxWords, both inclusive. This can use commas and semicolons between words, and can end a 4117 * sentence with ".", "!", "?", or "...". 4118 * 4119 * @param rng the RNG to use for the randomized string building 4120 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4121 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4122 * @return a sentence in the fake language as a String 4123 */ 4124 public String sentence(IRNG rng, int minWords, int maxWords) { 4125 return sentence(rng, minWords, maxWords, mid, 4126 end, 0.2); 4127 } 4128 4129 /** 4130 * Generate a sentence from this FakeLanguageGen, using and changing the current seed. The sentence's length in 4131 * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from 4132 * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency} 4133 * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}. 4134 * 4135 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4136 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4137 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4138 * space in the middle of a sentence 4139 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4140 * the very end of a sentence 4141 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4142 * midPunctuation should be inserted before spaces 4143 * @return a sentence in the fake language as a String 4144 */ 4145 public String sentence(int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4146 double midPunctuationFrequency) { 4147 return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency); 4148 } 4149 /** 4150 * Generate a sentence from this FakeLanguageGen, using the given seed as a long. The sentence's length in 4151 * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from 4152 * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency} 4153 * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}. 4154 * 4155 * @param seed the seed, as a long, for the randomized string building 4156 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4157 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4158 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4159 * space in the middle of a sentence 4160 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4161 * the very end of a sentence 4162 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4163 * midPunctuation should be inserted before spaces 4164 * @return a sentence in the fake language as a String 4165 */ 4166 public String sentence(long seed, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4167 double midPunctuationFrequency) { 4168 srng.setState(seed); 4169 return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency); 4170 } 4171 4172 /** 4173 * Generate a sentence from this FakeLanguageGen using the specific RNG. The sentence's length in 4174 * words will be between minWords and maxWords, both inclusive. It will put one of the punctuation Strings from 4175 * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency} 4176 * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}. 4177 * 4178 * @param rng the RNG to use for the randomized string building 4179 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4180 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4181 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4182 * space in the middle of a sentence 4183 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4184 * the very end of a sentence 4185 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4186 * midPunctuation should be inserted before spaces 4187 * @return a sentence in the fake language as a String 4188 */ 4189 public String sentence(IRNG rng, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4190 double midPunctuationFrequency) { 4191 if (minWords < 1) 4192 minWords = 1; 4193 if (minWords > maxWords) 4194 maxWords = minWords; 4195 if (midPunctuationFrequency > 1.0) { 4196 midPunctuationFrequency = 1.0 / midPunctuationFrequency; 4197 } 4198 ssb.setLength(0); 4199 ssb.ensureCapacity(12 * maxWords); 4200 ssb.append(word(rng, true)); 4201 for (int i = 1; i < minWords; i++) { 4202 if (rng.nextDouble() < midPunctuationFrequency) { 4203 ssb.append(rng.getRandomElement(midPunctuation)); 4204 } 4205 ssb.append(' ').append(word(rng, false)); 4206 } 4207 for (int i = minWords; i < maxWords && rng.nextInt(2 * maxWords) > i; i++) { 4208 if (rng.nextDouble() < midPunctuationFrequency) { 4209 ssb.append(rng.getRandomElement(midPunctuation)); 4210 } 4211 ssb.append(' ').append(word(rng, false)); 4212 } 4213 if (endPunctuation != null && endPunctuation.length > 0) 4214 ssb.append(rng.getRandomElement(endPunctuation)); 4215 return ssb.toString(); 4216 } 4217 4218 /** 4219 * Generate a sentence from this FakeLanguageGen that fits in the given length limit. The sentence's length in 4220 * words will be between minWords and maxWords, both inclusive, unless it would exceed maxChars, in which case it is 4221 * truncated. It will put one of the punctuation Strings from {@code midPunctuation} between two words (before the 4222 * space) at a frequency of {@code midPunctuationFrequency} (between 0 and 1), and will end the sentence with one 4223 * String chosen from {@code endPunctuation}. 4224 * 4225 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4226 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4227 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4228 * space in the middle of a sentence 4229 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4230 * the very end of a sentence 4231 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4232 * midPunctuation should be inserted before spaces 4233 * @param maxChars the longest string length this can produce; should be at least {@code 6 * minWords} 4234 * @return a sentence in the fake language as a String 4235 */ 4236 public String sentence(int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4237 double midPunctuationFrequency, int maxChars) { 4238 return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency, maxChars); 4239 } 4240 4241 /** 4242 * Generate a sentence from this FakeLanguageGen that fits in the given length limit, using the given seed as a 4243 * long. The sentence's length in words will be between minWords and maxWords, both inclusive, unless it would 4244 * exceed maxChars, in which case it is truncated. It will put one of the punctuation Strings from 4245 * {@code midPunctuation} between two words (before the space) at a frequency of {@code midPunctuationFrequency} 4246 * (between 0 and 1), and will end the sentence with one String chosen from {@code endPunctuation}. 4247 * 4248 * @param seed the seed, as a long, for the randomized string building 4249 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4250 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4251 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4252 * space in the middle of a sentence 4253 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4254 * the very end of a sentence 4255 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4256 * midPunctuation should be inserted before spaces 4257 * @param maxChars the longest string length this can produce; should be at least {@code 6 * minWords} 4258 * @return a sentence in the fake language as a String 4259 */ 4260 public String sentence(long seed, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4261 double midPunctuationFrequency, int maxChars) { 4262 srng.setState(seed); 4263 return sentence(srng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency, maxChars); 4264 } 4265 4266 /** 4267 * Generate a sentence from this FakeLanguageGen using the given RNG that fits in the given length limit. The 4268 * sentence's length in words will be between minWords and maxWords, both inclusive, unless it would exceed 4269 * maxChars, in which case it is truncated. It will put one of the punctuation Strings from {@code midPunctuation} 4270 * between two words (before the space) at a frequency of {@code midPunctuationFrequency} (between 0 and 1), and 4271 * will end the sentence with one String chosen from {@code endPunctuation}. 4272 * 4273 * @param rng the RNG to use for the randomized string building 4274 * @param minWords an int for the minimum number of words in a sentence; should be at least 1 4275 * @param maxWords an int for the maximum number of words in a sentence; should be at least equal to minWords 4276 * @param midPunctuation a String array where each element is a comma, semicolon, or the like that goes before a 4277 * space in the middle of a sentence 4278 * @param endPunctuation a String array where each element is a period, question mark, or the like that goes at 4279 * the very end of a sentence 4280 * @param midPunctuationFrequency a double between 0.0 and 1.0 that determines how often Strings from 4281 * midPunctuation should be inserted before spaces 4282 * @param maxChars the longest string length this can produce; should be at least {@code 6 * minWords} 4283 * @return a sentence in the fake language as a String 4284 */ 4285 public String sentence(IRNG rng, int minWords, int maxWords, String[] midPunctuation, String[] endPunctuation, 4286 double midPunctuationFrequency, int maxChars) { 4287 if(maxChars < 0) 4288 return sentence(rng, minWords, maxWords, midPunctuation, endPunctuation, midPunctuationFrequency); 4289 if (minWords < 1) 4290 minWords = 1; 4291 if (minWords > maxWords) 4292 maxWords = minWords; 4293 if (midPunctuationFrequency > 1.0) { 4294 midPunctuationFrequency = 1.0 / midPunctuationFrequency; 4295 } 4296 if (maxChars < 4) 4297 return "!"; 4298 if (maxChars <= 5 * minWords) { 4299 minWords = 1; 4300 maxWords = 1; 4301 } 4302 int frustration = 0; 4303 ssb.setLength(0); 4304 ssb.ensureCapacity(maxChars); 4305 String next = word(rng, true); 4306 while (next.length() >= maxChars - 1 && frustration < 50) { 4307 next = word(rng, true); 4308 frustration++; 4309 } 4310 if (frustration >= 50) return "!"; 4311 ssb.append(next); 4312 for (int i = 1; i < minWords && ssb.length() < maxChars - 7; i++) { 4313 if (rng.nextDouble() < midPunctuationFrequency && ssb.length() < maxChars - 3) { 4314 ssb.append(rng.getRandomElement(midPunctuation)); 4315 } 4316 next = word(rng, false); 4317 while (ssb.length() + next.length() >= maxChars - 2 && frustration < 50) { 4318 next = word(rng, false); 4319 frustration++; 4320 } 4321 if (frustration >= 50) break; 4322 ssb.append(' ').append(next); 4323 } 4324 for (int i = minWords; i < maxWords && ssb.length() < maxChars - 7 && rng.nextInt(2 * maxWords) > i && frustration < 50; i++) { 4325 if (rng.nextDouble() < midPunctuationFrequency && ssb.length() < maxChars - 3) { 4326 ssb.append(rng.getRandomElement(midPunctuation)); 4327 } 4328 next = word(rng, false); 4329 while (ssb.length() + next.length() >= maxChars - 2 && frustration < 50) { 4330 next = word(rng, false); 4331 frustration++; 4332 } 4333 if (frustration >= 50) break; 4334 ssb.append(' '); 4335 ssb.append(next); 4336 } 4337 4338 if (endPunctuation != null && endPunctuation.length > 0) { 4339 4340 next = rng.getRandomElement(endPunctuation); 4341 if (ssb.length() + next.length() >= maxChars) 4342 ssb.append('.'); 4343 else 4344 ssb.append(next); 4345 } 4346 4347 if (ssb.length() > maxChars) 4348 return "!"; 4349 return ssb.toString(); 4350 } 4351 4352 protected String[] merge1000(IRNG rng, String[] me, String[] other, double otherInfluence) { 4353 if (other.length <= 0 && me.length <= 0) 4354 return new String[]{}; 4355 String[] ret = new String[1000]; 4356 int otherCount = (int) (1000 * otherInfluence); 4357 int idx = 0; 4358 if (other.length > 0) { 4359 String[] tmp = new String[other.length]; 4360 rng.shuffle(other, tmp); 4361 for (idx = 0; idx < otherCount; idx++) { 4362 ret[idx] = tmp[idx % tmp.length]; 4363 } 4364 } 4365 if (me.length > 0) { 4366 String[] tmp = new String[me.length]; 4367 rng.shuffle(me, tmp); 4368 for (; idx < 1000; idx++) { 4369 ret[idx] = tmp[idx % tmp.length]; 4370 } 4371 } else { 4372 for (; idx < 1000; idx++) { 4373 ret[idx] = other[idx % other.length]; 4374 } 4375 } 4376 return ret; 4377 } 4378 4379 4380 protected String[] accentVowels(IRNG rng, String[] me, double influence) { 4381 String[] ret = new String[1000]; 4382 int otherCount = (int) (1000 * influence); 4383 int idx; 4384 Matcher matcher; 4385 if (me.length > 0) { 4386 String[] tmp = new String[me.length]; 4387 rng.shuffle(me, tmp); 4388 for (idx = 0; idx < otherCount; idx++) { 4389 ret[idx] = tmp[idx % tmp.length] 4390 .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)]) 4391 .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)]) 4392 .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)]) 4393 .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)]) 4394 .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)]); 4395 matcher = repeats.matcher(ret[idx]); 4396 if (matcher.find()) { 4397 ret[idx] = matcher.replaceAll(rng.getRandomElement(me)); 4398 } 4399 } 4400 for (; idx < 1000; idx++) { 4401 ret[idx] = tmp[idx % tmp.length]; 4402 } 4403 } else 4404 return new String[]{}; 4405 return ret; 4406 } 4407 4408 protected String[] accentConsonants(IRNG rng, String[] me, double influence) { 4409 String[] ret = new String[1000]; 4410 int otherCount = (int) (1000 * influence); 4411 int idx; 4412 Matcher matcher; 4413 if (me.length > 0) { 4414 String[] tmp = new String[me.length]; 4415 rng.shuffle(me, tmp); 4416 for (idx = 0; idx < otherCount; idx++) { 4417 ret[idx] = tmp[idx % tmp.length] 4418 //0 4419 .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)]) 4420 .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)]) 4421 .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)]) 4422 .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)]) 4423 .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)]) 4424 .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)]) 4425 .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)]) 4426 .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)]) 4427 //9 4428 .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)]) 4429 //11 4430 //12 4431 .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)]) 4432 .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)]) 4433 .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)]) 4434 //16 4435 .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)]) 4436 //18 4437 .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)]) 4438 .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]); 4439 4440 matcher = repeats.matcher(ret[idx]); 4441 if (matcher.find()) { 4442 ret[idx] = matcher.replaceAll(rng.getRandomElement(me)); 4443 } 4444 } 4445 for (; idx < 1000; idx++) { 4446 ret[idx] = tmp[idx % tmp.length]; 4447 } 4448 } else 4449 return new String[]{}; 4450 return ret; 4451 } 4452 4453 protected String[] accentBoth(IRNG rng, String[] me, double vowelInfluence, double consonantInfluence) { 4454 String[] ret = new String[1000]; 4455 int idx; 4456 Matcher matcher; 4457 if (me.length > 0) { 4458 String[] tmp = new String[me.length]; 4459 rng.shuffle(me, tmp); 4460 for (idx = 0; idx < 1000; idx++) { 4461 boolean subVowel = rng.nextDouble() < vowelInfluence, subCon = rng.nextDouble() < consonantInfluence; 4462 if (subVowel && subCon) { 4463 ret[idx] = tmp[idx % tmp.length] 4464 .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)]) 4465 .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)]) 4466 .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)]) 4467 .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)]) 4468 .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)]) 4469 4470 //0 4471 .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)]) 4472 .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)]) 4473 .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)]) 4474 .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)]) 4475 .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)]) 4476 .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)]) 4477 .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)]) 4478 .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)]) 4479 //9 4480 .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)]) 4481 //11 4482 //12 4483 .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)]) 4484 .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)]) 4485 .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)]) 4486 //16 4487 .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)]) 4488 //18 4489 .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)]) 4490 .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]); 4491 4492 matcher = repeats.matcher(ret[idx]); 4493 if (matcher.find()) { 4494 ret[idx] = matcher.replaceAll(rng.getRandomElement(me)); 4495 } 4496 } else if (subVowel) { 4497 ret[idx] = tmp[idx % tmp.length] 4498 .replace('a', accentedVowels[0][rng.nextInt(accentedVowels[0].length)]) 4499 .replace('e', accentedVowels[1][rng.nextInt(accentedVowels[1].length)]) 4500 .replace('i', accentedVowels[2][rng.nextInt(accentedVowels[2].length)]) 4501 .replace('o', accentedVowels[3][rng.nextInt(accentedVowels[3].length)]) 4502 .replace('u', accentedVowels[4][rng.nextInt(accentedVowels[4].length)]); 4503 4504 matcher = repeats.matcher(ret[idx]); 4505 if (matcher.find()) { 4506 ret[idx] = matcher.replaceAll(rng.getRandomElement(me)); 4507 } 4508 } else if (subCon) { 4509 ret[idx] = tmp[idx % tmp.length] 4510 //0 4511 .replace('c', accentedConsonants[1][rng.nextInt(accentedConsonants[1].length)]) 4512 .replace('d', accentedConsonants[2][rng.nextInt(accentedConsonants[2].length)]) 4513 .replace('f', accentedConsonants[3][rng.nextInt(accentedConsonants[3].length)]) 4514 .replace('g', accentedConsonants[4][rng.nextInt(accentedConsonants[4].length)]) 4515 .replace('h', accentedConsonants[5][rng.nextInt(accentedConsonants[5].length)]) 4516 .replace('j', accentedConsonants[6][rng.nextInt(accentedConsonants[6].length)]) 4517 .replace('k', accentedConsonants[7][rng.nextInt(accentedConsonants[7].length)]) 4518 .replace('l', accentedConsonants[8][rng.nextInt(accentedConsonants[8].length)]) 4519 //9 4520 .replace('n', accentedConsonants[10][rng.nextInt(accentedConsonants[10].length)]) 4521 //11 4522 //12 4523 .replace('r', accentedConsonants[13][rng.nextInt(accentedConsonants[13].length)]) 4524 .replace('s', accentedConsonants[14][rng.nextInt(accentedConsonants[14].length)]) 4525 .replace('t', accentedConsonants[15][rng.nextInt(accentedConsonants[15].length)]) 4526 //16 4527 .replace('w', accentedConsonants[17][rng.nextInt(accentedConsonants[17].length)]) 4528 //18 4529 .replace('y', accentedConsonants[19][rng.nextInt(accentedConsonants[19].length)]) 4530 .replace('z', accentedConsonants[20][rng.nextInt(accentedConsonants[20].length)]); 4531 4532 matcher = repeats.matcher(ret[idx]); 4533 if (matcher.find()) { 4534 ret[idx] = matcher.replaceAll(rng.getRandomElement(me)); 4535 } 4536 } else ret[idx] = tmp[idx % tmp.length]; 4537 4538 } 4539 } else 4540 return new String[]{}; 4541 return ret; 4542 } 4543 4544 /** 4545 * Makes a new FakeLanguageGen that mixes this object with {@code other}, mingling the consonants and vowels they 4546 * use as well as any word suffixes or other traits, and favoring the qualities in {@code other} by 4547 * {@code otherInfluence}, which will value both languages evenly if it is 0.5 . 4548 * <br> 4549 * You should generally prefer {@link #mix(double, FakeLanguageGen, double, Object...)} or 4550 * {@link #mixAll(Object...)} if you ever mix 3 or more languages. Chaining this mix() method can be very 4551 * counter-intuitive because the weights are relative, while in the other mix() and mixAll() they are absolute. 4552 * @param other another FakeLanguageGen to mix along with this one into a new language 4553 * @param otherInfluence how much other should affect the pair, with 0.5 being equal and 1.0 being only other used 4554 * @return a new FakeLanguageGen with traits from both languages 4555 */ 4556 public FakeLanguageGen mix(FakeLanguageGen other, double otherInfluence) { 4557 otherInfluence = Math.max(0.0, Math.min(otherInfluence, 1.0)); 4558 double myInfluence = 1.0 - otherInfluence; 4559 4560 GWTRNG rng = new GWTRNG(hashCode(), other.hashCode() ^ NumberTools.doubleToMixedIntBits(otherInfluence)); 4561 4562 String[] ov = merge1000(rng, openingVowels, other.openingVowels, otherInfluence), 4563 mv = merge1000(rng, midVowels, other.midVowels, otherInfluence), 4564 oc = merge1000(rng, openingConsonants, other.openingConsonants, otherInfluence * 4565 Math.max(0.0, Math.min(1.0, 1.0 - other.vowelStartFrequency + vowelStartFrequency))), 4566 mc = merge1000(rng, midConsonants, other.midConsonants, otherInfluence), 4567 cc = merge1000(rng, closingConsonants, other.closingConsonants, otherInfluence * 4568 Math.max(0.0, Math.min(1.0, 1.0 - other.vowelEndFrequency + vowelEndFrequency))), 4569 cs = merge1000(rng, closingSyllables, other.closingSyllables, otherInfluence * 4570 Math.max(0.0, Math.min(1.0, other.syllableEndFrequency - syllableEndFrequency))), 4571 splitters = merge1000(rng, vowelSplitters, other.vowelSplitters, otherInfluence); 4572 4573 double[] fr = new double[Math.max(syllableFrequencies.length, other.syllableFrequencies.length)]; 4574 System.arraycopy(syllableFrequencies, 0, fr, 0, syllableFrequencies.length); 4575 for (int i = 0; i < other.syllableFrequencies.length; i++) { 4576 fr[i] += other.syllableFrequencies[i]; 4577 } 4578 ArrayList<Modifier> mods = new ArrayList<>(modifiers.size() + other.modifiers.size()); 4579 mods.addAll(modifiers); 4580 mods.addAll(other.modifiers); 4581 return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, splitters, fr, 4582 vowelStartFrequency * myInfluence + other.vowelStartFrequency * otherInfluence, 4583 vowelEndFrequency * myInfluence + other.vowelEndFrequency * otherInfluence, 4584 vowelSplitFrequency * myInfluence + other.vowelSplitFrequency * otherInfluence, 4585 syllableEndFrequency * myInfluence + other.syllableEndFrequency * otherInfluence, 4586 (sanityChecks == null) ? other.sanityChecks : sanityChecks, true, mods) 4587 .setName(otherInfluence > 0.5 ? other.name + "/" + name : name + "/" + other.name); 4588 } 4589 4590 private static double readDouble(Object o) { 4591 if (o instanceof Double) return (Double) o; 4592 else if (o instanceof Float) return (Float) o; 4593 else if (o instanceof Long) return ((Long) o).doubleValue(); 4594 else if (o instanceof Integer) return (Integer) o; 4595 else if (o instanceof Short) return (Short) o; 4596 else if (o instanceof Byte) return (Byte) o; 4597 else if (o instanceof Character) return (Character) o; 4598 return 0.0; 4599 } 4600 4601 /** 4602 * Produces a FakeLanguageGen by mixing this FakeLanguageGen with one or more other FakeLanguageGen objects. Takes 4603 * a weight for this, another FakeLanguageGen, a weight for that FakeLanguageGen, then a possibly-empty group of 4604 * FakeLanguageGen parameters and the weights for those parameters. If other1 is null or if pairs has been given a 4605 * value of null instead of the normal (possibly empty) array of Objects, then this simply returns a copy of this 4606 * FakeLanguageGen. Otherwise, it will at least mix this language with other1 using the given weights for each. 4607 * If pairs is not empty, it has special requirements for what types it allows and in what order, but does no type 4608 * checking. Specifically, pairs requires the first Object to be a FakeLanguageGen, the next to be a number of some 4609 * kind that will be the weight for the previous FakeLanguageGen(this method can handle non-Double weights, and 4610 * converts them to Double if needed), and every two parameters after that to follow the same order and pattern 4611 * (FakeLanguageGen, then number, then FakeLanguageGen, then number...). Weights are absolute, and don't depend on 4612 * earlier weights, which is the case when chaining the {@link #mix(FakeLanguageGen, double)} method. This makes 4613 * reasoning about the ideal weights for multiple mixed languages easier; to mix 3 languages equally you can use 4614 * 3 equal weights with this, whereas with mix chaining you would need to mix the first two with 0.5 and the third 4615 * with 0.33 . 4616 * <br> 4617 * It's up to you whether you want to use {@link #mixAll(Object...)} or this method; they call the same code and 4618 * produce the same result, including the summary for serialization support. You probably shouldn't use 4619 * {@link #mix(FakeLanguageGen, double)} with two arguments in new code, since it's easy to make mistakes when 4620 * mixing three or more languages (calling that twice or more). 4621 * 4622 * @param myWeight the weight to assign this FakeLanguageGen in the mix 4623 * @param other1 another FakeLanguageGen to mix in; if null, this method will abort and return {@link #copy()} 4624 * @param weight1 the weight to assign other1 in the mix 4625 * @param pairs may be empty, not null; otherwise must alternate between FakeLanguageGen and number (weight) elements 4626 * @return a FakeLanguageGen produced by mixing this with any FakeLanguageGen arguments by the given weights 4627 */ 4628 public FakeLanguageGen mix(double myWeight, FakeLanguageGen other1, double weight1, Object... pairs) { 4629 if (other1 == null || pairs == null) 4630 return copy(); 4631 OrderedSet<Modifier> mods = new OrderedSet<>(modifiers); 4632 FakeLanguageGen mixer = removeModifiers(); 4633 FakeLanguageGen[] languages = new FakeLanguageGen[2 + (pairs.length >>> 1)]; 4634 double[] weights = new double[languages.length]; 4635 String[] summaries = new String[languages.length]; 4636 boolean summarize = true; 4637 double total = 0.0, current, weight; 4638 languages[0] = mixer; 4639 total += weights[0] = myWeight; 4640 if ((summaries[0] = mixer.summary) == null) summarize = false; 4641 mods.addAll(other1.modifiers); 4642 languages[1] = other1.removeModifiers(); 4643 total += weights[1] = weight1; 4644 if (summarize && (summaries[1] = languages[1].summary) == null) summarize = false; 4645 for (int i = 1, p = 2; i < pairs.length; i += 2, p++) { 4646 if (pairs[i] == null || pairs[i - 1] == null) 4647 continue; 4648 languages[p] = ((FakeLanguageGen) pairs[i - 1]).removeModifiers(); 4649 total += weights[p] = readDouble(pairs[i]); 4650 if (summarize && (summaries[p] = languages[p].summary) == null) summarize = false; 4651 } 4652 if (total == 0) 4653 return copy(); 4654 current = myWeight / total; 4655 for (int i = 1; i < languages.length; i++) { 4656 if ((weight = weights[i]) > 0) 4657 mixer = mixer.mix(languages[i], weight / total / (current += weight / total)); 4658 } 4659 if (summarize) { 4660 sb.setLength(0); 4661 String c; 4662 int idx; 4663 for (int i = 0; i < summaries.length; i++) { 4664 c = summaries[i]; 4665 idx = c.indexOf('@'); 4666 if (idx >= 0) { 4667 sb.append(c, 0, idx + 1).append(weights[i]); 4668 if (i < summaries.length - 1) 4669 sb.append('~'); 4670 } 4671 } 4672 for (int i = 0; i < mods.size(); i++) { 4673 sb.append('℗').append(mods.getAt(i).serializeToString()); 4674 } 4675 return mixer.addModifiers(mods).summarize(sb.toString()); 4676 } else 4677 return mixer.addModifiers(mods); 4678 } 4679 4680 /** 4681 * Produces a FakeLanguageGen from a group of FakeLanguageGen parameters and the weights for those parameters. 4682 * Requires the first Object in pairs to be a FakeLanguageGen, the next to be a number of some kind that will be the 4683 * weight for the previous FakeLanguageGen(this method can handle non-Double weights, and converts them to Double 4684 * if needed), and every two parameters after that to follow the same order and pattern (FakeLanguageGen, then 4685 * number, then FakeLanguageGen, then number...). There should be at least 4 elements in pairs, half of them 4686 * languages and half of them weights, for this to do any mixing, but it can produce a result with as little as one 4687 * FakeLanguageGen (returning a copy of the first FakeLanguageGen). Weights are absolute, and don't depend on 4688 * earlier weights, which is the case when chaining the {@link #mix(FakeLanguageGen, double)} method. This makes 4689 * reasoning about the ideal weights for multiple mixed languages easier; to mix 3 languages equally you can use 4690 * 3 equal weights with this, whereas with mix chaining you would need to mix the first two with 0.5 and the third 4691 * with 0.33 . 4692 * <br> 4693 * This is probably the most intuitive way to mix languages here, though there's also 4694 * {@link #mix(double, FakeLanguageGen, double, Object...)}, which is very similar but doesn't take its parameters 4695 * in quite the same way (it isn't static, and treats the FakeLanguageGen object like the first item in pairs here). 4696 * Used internally in the deserialization code. 4697 * 4698 * @param pairs should have at least one item, and must alternate between FakeLanguageGen and number (weight) elements 4699 * @return a FakeLanguageGen produced by mixing any FakeLanguageGen arguments by the given weights 4700 */ 4701 public static FakeLanguageGen mixAll(Object... pairs) { 4702 int len; 4703 if (pairs == null || (len = pairs.length) <= 0) 4704 return ENGLISH.copy(); 4705 if (len < 4) 4706 return ((FakeLanguageGen) pairs[0]).copy(); 4707 Object[] pairs2 = new Object[len - 4]; 4708 if (len > 4) 4709 System.arraycopy(pairs, 4, pairs2, 0, len - 4); 4710 return ((FakeLanguageGen) pairs[0]).mix(readDouble(pairs[1]), (FakeLanguageGen) pairs[2], readDouble(pairs[3]), pairs2); 4711 } 4712 4713 /** 4714 * Produces a new FakeLanguageGen like this one but with extra vowels and/or consonants possible, adding from a wide 4715 * selection of accented vowels (if vowelInfluence is above 0.0) and/or consonants (if consonantInfluence is above 4716 * 0.0). This may produce a gibberish-looking language with no rhyme or reason to the accents, and generally 4717 * consonantInfluence should be very low if it is above 0 at all. 4718 * @param vowelInfluence between 0.0 and 1.0; if 0.0 will not affect vowels at all 4719 * @param consonantInfluence between 0.0 and 1.0; if 0.0 will not affect consonants at all 4720 * @return a new FakeLanguageGen with modifications to add accented vowels and/or consonants 4721 */ 4722 public FakeLanguageGen addAccents(double vowelInfluence, double consonantInfluence) { 4723 vowelInfluence = Math.max(0.0, Math.min(vowelInfluence, 1.0)); 4724 consonantInfluence = Math.max(0.0, Math.min(consonantInfluence, 1.0)); 4725 GWTRNG rng = new GWTRNG(hashCode(), 4726 NumberTools.doubleToMixedIntBits(vowelInfluence) 4727 ^ NumberTools.doubleToMixedIntBits(consonantInfluence)); 4728 String[] ov = accentVowels(rng, openingVowels, vowelInfluence), 4729 mv = accentVowels(rng, midVowels, vowelInfluence), 4730 oc = accentConsonants(rng, openingConsonants, consonantInfluence), 4731 mc = accentConsonants(rng, midConsonants, consonantInfluence), 4732 cc = accentConsonants(rng, closingConsonants, consonantInfluence), 4733 cs = accentBoth(rng, closingSyllables, vowelInfluence, consonantInfluence); 4734 4735 4736 return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, vowelSplitters, syllableFrequencies, 4737 vowelStartFrequency, 4738 vowelEndFrequency, 4739 vowelSplitFrequency, 4740 syllableEndFrequency, sanityChecks, clean, modifiers).setName(name + "-Bònüs"); 4741 } 4742 4743 private static String[] copyStrings(String[] start) { 4744 String[] next = new String[start.length]; 4745 System.arraycopy(start, 0, next, 0, start.length); 4746 return next; 4747 } 4748 4749 /** 4750 * Useful for cases with limited fonts, this produces a new FakeLanguageGen like this one but with all accented 4751 * characters removed (including almost all non-ASCII Latin-alphabet characters, but only some Greek and Cyrillic 4752 * characters). This will replace letters like "A with a ring" with just "A". Some of the letters chosen as 4753 * replacements aren't exact matches. 4754 * @return a new FakeLanguageGen like this one but without accented letters 4755 */ 4756 public FakeLanguageGen removeAccents() { 4757 4758 String[] ov = copyStrings(openingVowels), 4759 mv = copyStrings(midVowels), 4760 oc = copyStrings(openingConsonants), 4761 mc = copyStrings(midConsonants), 4762 cc = copyStrings(closingConsonants), 4763 cs = copyStrings(closingSyllables); 4764 for (int i = 0; i < ov.length; i++) { 4765 ov[i] = removeAccents(openingVowels[i]).toString(); 4766 } 4767 for (int i = 0; i < mv.length; i++) { 4768 mv[i] = removeAccents(midVowels[i]).toString(); 4769 } 4770 for (int i = 0; i < oc.length; i++) { 4771 oc[i] = removeAccents(openingConsonants[i]).toString(); 4772 } 4773 for (int i = 0; i < mc.length; i++) { 4774 mc[i] = removeAccents(midConsonants[i]).toString(); 4775 } 4776 for (int i = 0; i < cc.length; i++) { 4777 cc[i] = removeAccents(closingConsonants[i]).toString(); 4778 } 4779 for (int i = 0; i < cs.length; i++) { 4780 cs[i] = removeAccents(closingSyllables[i]).toString(); 4781 } 4782 4783 return new FakeLanguageGen(ov, mv, oc, mc, cc, cs, vowelSplitters, syllableFrequencies, 4784 vowelStartFrequency, 4785 vowelEndFrequency, 4786 vowelSplitFrequency, 4787 syllableEndFrequency, sanityChecks, clean, modifiers); 4788 } 4789 4790 /** 4791 * Returns the name of this FakeLanguageGen, such as "English" or "Deep Speech", if one was registered for this. 4792 * In the case of hybrid languages produced by {@link #mix(FakeLanguageGen, double)} or related methods, this should 4793 * produce a String like "English/French" (or "English/French/Maori" if more are mixed together). If no name was 4794 * registered, this will return "Nameless Language". 4795 * @return the human-readable name of this language, or "Nameless Language" if none is known 4796 */ 4797 public String getName() { 4798 return name; 4799 } 4800 private FakeLanguageGen setName(final String languageName) 4801 { 4802 name = languageName; 4803 return this; 4804 } 4805 4806 /** 4807 * Adds the specified Modifier objects from a Collection to a copy of this FakeLanguageGen and returns it. 4808 * You can obtain a Modifier with the static constants in the FakeLanguageGen.Modifier nested class, the 4809 * FakeLanguageGen.modifier() method, or Modifier's constructor. 4810 * 4811 * @param mods an array or vararg of Modifier objects 4812 * @return a copy of this with the Modifiers added 4813 */ 4814 public FakeLanguageGen addModifiers(Collection<Modifier> mods) { 4815 FakeLanguageGen next = copy(); 4816 next.modifiers.addAll(mods); 4817 return next; 4818 } 4819 4820 /** 4821 * Adds the specified Modifier objects to a copy of this FakeLanguageGen and returns it. 4822 * You can obtain a Modifier with the static constants in the FakeLanguageGen.Modifier nested class, the 4823 * FakeLanguageGen.modifier() method, or Modifier's constructor. 4824 * 4825 * @param mods an array or vararg of Modifier objects 4826 * @return a copy of this with the Modifiers added 4827 */ 4828 public FakeLanguageGen addModifiers(Modifier... mods) { 4829 FakeLanguageGen next = copy(); 4830 Collections.addAll(next.modifiers, mods); 4831 return next; 4832 } 4833 4834 /** 4835 * Creates a copy of this FakeLanguageGen with no modifiers. 4836 * 4837 * @return a copy of this FakeLanguageGen with modifiers removed. 4838 */ 4839 public FakeLanguageGen removeModifiers() { 4840 FakeLanguageGen next = copy(); 4841 next.modifiers.clear(); 4842 return next; 4843 } 4844 4845 /** 4846 * Convenience method that just calls {@link Modifier#Modifier(String, String)}. 4847 * @param pattern a String that will be interpreted as a regex pattern using {@link Pattern} 4848 * @param replacement a String that will be interpreted as a replacement string for pattern; can include "$1" and the like if pattern has groups 4849 * @return a Modifier that can be applied to a FakeLanguagGen 4850 */ 4851 public static Modifier modifier(String pattern, String replacement) { 4852 return new Modifier(pattern, replacement); 4853 } 4854 /** 4855 * Convenience method that just calls {@link Modifier#Modifier(String, String, double)}. 4856 * @param pattern a String that will be interpreted as a regex pattern using {@link Pattern} 4857 * @param replacement a String that will be interpreted as a replacement string for pattern; can include "$1" and the like if pattern has groups 4858 * @param chance the chance, as a double between 0 and 1, that the Modifier will take effect 4859 * @return a Modifier that can be applied to a FakeLanguagGen 4860 */ 4861 public static Modifier modifier(String pattern, String replacement, double chance) { 4862 return new Modifier(pattern, replacement, chance); 4863 } 4864 4865 @Override 4866 public boolean equals(Object o) { 4867 if (this == o) return true; 4868 if (o == null || getClass() != o.getClass()) return false; 4869 4870 FakeLanguageGen that = (FakeLanguageGen) o; 4871 4872 if (clean != that.clean) return false; 4873 if (Double.compare(that.totalSyllableFrequency, totalSyllableFrequency) != 0) return false; 4874 if (Double.compare(that.vowelStartFrequency, vowelStartFrequency) != 0) return false; 4875 if (Double.compare(that.vowelEndFrequency, vowelEndFrequency) != 0) return false; 4876 if (Double.compare(that.vowelSplitFrequency, vowelSplitFrequency) != 0) return false; 4877 if (Double.compare(that.syllableEndFrequency, syllableEndFrequency) != 0) return false; 4878 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4879 if (!Arrays.equals(openingVowels, that.openingVowels)) return false; 4880 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4881 if (!Arrays.equals(midVowels, that.midVowels)) return false; 4882 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4883 if (!Arrays.equals(openingConsonants, that.openingConsonants)) return false; 4884 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4885 if (!Arrays.equals(midConsonants, that.midConsonants)) return false; 4886 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4887 if (!Arrays.equals(closingConsonants, that.closingConsonants)) return false; 4888 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4889 if (!Arrays.equals(vowelSplitters, that.vowelSplitters)) return false; 4890 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4891 if (!Arrays.equals(closingSyllables, that.closingSyllables)) return false; 4892 if (!Arrays.equals(syllableFrequencies, that.syllableFrequencies)) return false; 4893 // Probably incorrect - comparing Object[] arrays with Arrays.equals 4894 if (!Arrays.equals(sanityChecks, that.sanityChecks)) return false; 4895 return modifiers != null ? modifiers.equals(that.modifiers) : that.modifiers == null; 4896 } 4897 4898 @Override 4899 public int hashCode() { 4900 int result = 31 * 31 * 31 * 31 + 4901 31 * 31 * 31 * CrossHash.hash(openingVowels) + 4902 31 * 31 * CrossHash.hash(midVowels) + 4903 31 * CrossHash.hash(openingConsonants) + 4904 CrossHash.hash(midConsonants) | 0; 4905 result = 31 * 31 * 31 * 31 * result + 4906 31 * 31 * 31 * CrossHash.hash(closingConsonants) + 4907 31 * 31 * CrossHash.hash(vowelSplitters) + 4908 31 * CrossHash.hash(closingSyllables) ^ 4909 (clean ? 1 : 0); 4910 result = 31 * 31 * 31 * 31 * result + 4911 31 * 31 * 31 * CrossHash.hash(syllableFrequencies) + 4912 31 * 31 * NumberTools.doubleToMixedIntBits(totalSyllableFrequency) + 4913 31 * NumberTools.doubleToMixedIntBits(vowelStartFrequency) + 4914 NumberTools.doubleToMixedIntBits(vowelEndFrequency) | 0; 4915 result = 31 * 31 * 31 * 31 * result + 4916 31 * 31 * 31 * (sanityChecks != null ? sanityChecks.length + 1 : 0) + 4917 31 * 31 * NumberTools.doubleToMixedIntBits(syllableEndFrequency) + 4918 31 * NumberTools.doubleToMixedIntBits(vowelSplitFrequency) | 0; 4919 if(modifiers != null) { 4920 for (int i = 0; i < modifiers.size(); i++) { 4921 result = result + 7 * (i + 1) * modifiers.get(i).hashCode() | 0; 4922 } 4923 } 4924 return result; 4925 } 4926 public long hash64() { 4927 long result = CrossHash.hash64(openingVowels); 4928 result = 31L * result + CrossHash.hash64(midVowels); 4929 result = 31L * result + CrossHash.hash64(openingConsonants); 4930 result = 31L * result + CrossHash.hash64(midConsonants); 4931 result = 31L * result + CrossHash.hash64(closingConsonants); 4932 result = 31L * result + CrossHash.hash64(vowelSplitters); 4933 result = 31L * result + CrossHash.hash64(closingSyllables); 4934 result = 31L * result + CrossHash.hash64(syllableFrequencies); 4935 result = 31L * result + (clean ? 1L : 0L); 4936 result = 31L * result + NumberTools.doubleToLongBits(totalSyllableFrequency); 4937 result = 31L * result + NumberTools.doubleToLongBits(vowelStartFrequency); 4938 result = 31L * result + NumberTools.doubleToLongBits(vowelEndFrequency); 4939 result = 31L * result + NumberTools.doubleToLongBits(vowelSplitFrequency); 4940 result = 31L * result + NumberTools.doubleToLongBits(syllableEndFrequency); 4941 result = 31L * result + (sanityChecks != null ? sanityChecks.length + 1L : 0L); 4942 result *= 31L; 4943 if(modifiers != null) { 4944 for (int i = 0; i < modifiers.size(); i++) { 4945 result += 7L * (i + 1L) * CrossHash.hash64(modifiers.get(i).alterations); 4946 } 4947 } 4948 return result; 4949 } 4950 4951 @Override 4952 public String toString() { 4953 return "FakeLanguageGen{" + 4954 "openingVowels=" + Arrays.toString(openingVowels) + 4955 ", midVowels=" + Arrays.toString(midVowels) + 4956 ", openingConsonants=" + Arrays.toString(openingConsonants) + 4957 ", midConsonants=" + Arrays.toString(midConsonants) + 4958 ", closingConsonants=" + Arrays.toString(closingConsonants) + 4959 ", vowelSplitters=" + Arrays.toString(vowelSplitters) + 4960 ", closingSyllables=" + Arrays.toString(closingSyllables) + 4961 ", clean=" + clean + 4962 ", syllableFrequencies=" + Arrays.toString(syllableFrequencies) + 4963 ", totalSyllableFrequency=" + totalSyllableFrequency + 4964 ", vowelStartFrequency=" + vowelStartFrequency + 4965 ", vowelEndFrequency=" + vowelEndFrequency + 4966 ", vowelSplitFrequency=" + vowelSplitFrequency + 4967 ", syllableEndFrequency=" + syllableEndFrequency + 4968 ", sanityChecks=" + Arrays.toString(sanityChecks) + 4969 ", modifiers=" + modifiers + 4970 '}'; 4971 } 4972 4973 public FakeLanguageGen copy() { 4974 return new FakeLanguageGen(openingVowels, midVowels, openingConsonants, midConsonants, 4975 closingConsonants, closingSyllables, vowelSplitters, syllableFrequencies, vowelStartFrequency, 4976 vowelEndFrequency, vowelSplitFrequency, syllableEndFrequency, sanityChecks, clean, modifiers) 4977 .summarize(summary).setName(name); 4978 } 4979 4980 4981 public String serializeToString() { 4982 return (summary == null) ? "" : summary; 4983 } 4984 4985 public static FakeLanguageGen deserializeFromString(String data) { 4986 if (data == null || data.equals("")) 4987 return ENGLISH.copy(); 4988 int poundIndex = data.indexOf('#'), snailIndex = data.indexOf('@'), tempBreak = data.indexOf('℗'), 4989 breakIndex = (tempBreak < 0) ? data.length() : tempBreak, 4990 tildeIndex = Math.min(data.indexOf('~'), breakIndex), prevTildeIndex = -1; 4991 if (tildeIndex < 0) 4992 tildeIndex = data.length(); 4993 4994 if (snailIndex < 0) 4995 return ENGLISH.copy(); 4996 ArrayList<Object> pairs = new ArrayList<>(4); 4997 while (snailIndex >= 0) { 4998 if (poundIndex >= 0 && poundIndex < snailIndex) // random case 4999 { 5000 pairs.add(randomLanguage(Long.parseLong(data.substring(poundIndex + 1, snailIndex)))); 5001 pairs.add(Double.valueOf(data.substring(snailIndex + 1, tildeIndex))); 5002 poundIndex = -1; 5003 } else { 5004 pairs.add(registry.getAt(Integer.parseInt(data.substring(prevTildeIndex + 1, snailIndex)))); 5005 pairs.add(Double.valueOf(data.substring(snailIndex + 1, tildeIndex))); 5006 } 5007 snailIndex = data.indexOf('@', snailIndex + 1); 5008 if (snailIndex > breakIndex) 5009 break; 5010 prevTildeIndex = tildeIndex; 5011 tildeIndex = Math.min(data.indexOf('~', tildeIndex + 1), breakIndex); 5012 if (tildeIndex < 0) 5013 tildeIndex = data.length(); 5014 } 5015 ArrayList<Modifier> mods = new ArrayList<>(8); 5016 if (breakIndex == tempBreak) { 5017 tildeIndex = breakIndex - 1; 5018 while ((prevTildeIndex = data.indexOf('℗', tildeIndex + 1)) >= 0) { 5019 tildeIndex = data.indexOf('℗', prevTildeIndex + 1); 5020 if (tildeIndex < 0) tildeIndex = data.length(); 5021 mods.add(Modifier.deserializeFromString(data.substring(prevTildeIndex, tildeIndex))); 5022 } 5023 } 5024 FakeLanguageGen flg = mixAll(pairs.toArray()); 5025 flg.modifiers.addAll(mods); 5026 return flg; 5027 } 5028 5029 public static class Modifier implements Serializable { 5030 private static final long serialVersionUID = 1734863678490422371L; 5031 private transient static final StringBuilder modSB = new StringBuilder(32); 5032 public final Alteration[] alterations; 5033 5034 public Modifier() { 5035 alterations = new Alteration[0]; 5036 } 5037 5038 public Modifier(String pattern, String replacement) { 5039 alterations = new Alteration[]{new Alteration(pattern, replacement)}; 5040 } 5041 5042 public Modifier(String pattern, String replacement, double chance) { 5043 alterations = new Alteration[]{new Alteration(pattern, replacement, chance)}; 5044 } 5045 5046 public Modifier(Alteration... alts) { 5047 alterations = (alts == null) ? new Alteration[0] : alts; 5048 } 5049 5050 public StringBuilder modify(IRNG rng, StringBuilder sb) { 5051 Matcher m; 5052 Replacer.StringBuilderBuffer tb; 5053 boolean found; 5054 Alteration alt; 5055 for (int a = 0; a < alterations.length; a++) { 5056 alt = alterations[a]; 5057 modSB.setLength(0); 5058 tb = Replacer.wrap(modSB); 5059 m = alt.replacer.getPattern().matcher(sb); 5060 5061 found = false; 5062 while (true) { 5063 if (alt.chance >= 1 || rng.nextDouble() < alt.chance) { 5064 if (!Replacer.replaceStep(m, alt.replacer.getSubstitution(), tb)) 5065 break; 5066 found = true; 5067 } else { 5068 if (!m.find()) 5069 break; 5070 found = true; 5071 m.getGroup(MatchResult.PREFIX, tb); 5072 m.getGroup(MatchResult.MATCH, tb); 5073 m.setTarget(m, MatchResult.SUFFIX); 5074 } 5075 } 5076 if (found) { 5077 m.getGroup(MatchResult.TARGET, tb); 5078 sb.setLength(0); 5079 sb.append(modSB); 5080 } 5081 } 5082 return sb; 5083 } 5084 5085 /** 5086 * For a character who always pronounces 's', 'ss', and 'sh' as 'th'. 5087 */ 5088 public static final Modifier LISP = new Modifier("[tţťț]?[sśŝşšș]+h?", "th"); 5089 5090 /** 5091 * For a character who always lengthens 's' and 'z' sounds not starting a word. 5092 */ 5093 public static final Modifier HISS = new Modifier("(.)([sśŝşšșzźżž])+", "$1$2$2$2"); 5094 5095 /** 5096 * For a character who has a 20% chance to repeat a starting consonant or vowel. 5097 */ 5098 public static final Modifier STUTTER = new Modifier( 5099 new Alteration("^([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу]+)", "$1-$1", 0.2), 5100 new Alteration("^([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųαοειυωаеёийъыэюяоу]+)", "$1-$1", 0.2)); 5101 5102 /** 5103 * For a language that has a 40% chance to repeat a single Latin vowel (a, e, o, or a variant on one of them 5104 * like å or ö, but not merged letters like æ and œ). 5105 */ 5106 public static final Modifier DOUBLE_VOWELS = new Modifier( 5107 "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳ]|^)" 5108 + "([aàáâãäåāăąǻeèéêëēĕėęěòóôõöøōŏőǿ])" 5109 + "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳ]|$)", "$1$2$2$3", 0.4); 5110 5111 5112 /** 5113 * For a language that has a 50% chance to repeat a single consonant. 5114 */ 5115 public static final Modifier DOUBLE_CONSONANTS = new Modifier("([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу])" + 5116 "([^aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоуqwhjx])" + 5117 "([aàáâãäåæāăąǻǽeèéêëēĕėęěiìíîïĩīĭįıoòóôõöøōŏőœǿuùúûüũūŭůűųyýÿŷỳαοειυωаеёийъыэюяоу]|$)", "$1$2$2$3", 0.5); 5118 5119 /** 5120 * For a language that never repeats the same letter twice in a row. 5121 */ 5122 public static final Modifier NO_DOUBLES = new Modifier("(.)\\1", "$1"); 5123 5124 /** 5125 * Removes accented letters and the two non-English consonants from text generated with {@link #NORSE}. 5126 * Replaces á, é, í, ý, ó, æ, ú, and ö with a, e, i, y, o, ae, and ou. In some instances, replaces j 5127 * with y. Replaces ð and þ with th and th, except for when preceded by s (then it replaces sð or sþ 5128 * with st or st) or when the start of a word is fð or fþ, where it replaces with fr or fr. 5129 */ 5130 public static final Modifier SIMPLIFY_NORSE = replacementTable( 5131 "á", "a", 5132 "é", "e", 5133 "í", "i", 5134 "ý", "y", 5135 "ó", "o", 5136 "ú", "u", 5137 "æ", "ae", 5138 "ö", "ou", 5139 "([^aeiou])jy", "$1yai", 5140 "([^aeiou])j(?:[aeiouy]+)", "$1yo", 5141 "s([ðþ])", "st", 5142 "\\bf[ðþ]", "fr", 5143 "[ðþ]", "th"); 5144 5145 /** 5146 * Simple changes to merge "ae" into "æ", "oe" into "œ", and any of "aé", "áe", or "áé" into "ǽ". 5147 */ 5148 public static final Modifier LIGATURES = replacementTable("ae", "æ", "oe", "œ", "áe", "ǽ", "aé", "ǽ", "áé", "ǽ"); 5149 /** 5150 * Some changes that can be applied when sanity checks (which force re-generating a new word) aren't appropriate 5151 * for fixing a word that isn't pronounceable. 5152 */ 5153 public static final Modifier GENERAL_CLEANUP = replacementTable( 5154 "[æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy]([æǽœýÿŷỳy])", "$1", 5155 "q([ùúûüũūŭůűųu])$", "q$1e", 5156 "([ìíîïĩīĭįıi])[ìíîïĩīĭįıi]", "$1", 5157 "([æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy])[wŵẁẃẅ]$", "$1", 5158 "([ùúûüũūŭůűųu])([òóôõöøōŏőǿo])", "$2$1", 5159 "[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]([æǽœ])", "$1", 5160 "([æǽœ])[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]", "$1", 5161 "([wŵẁẃẅ])[wŵẁẃẅ]", "$1", 5162 "qq", "q"); 5163 5164 //àáâãäåāăąǻæǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳy 5165 //bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżž 5166 5167 /** 5168 * Creates a Modifier that will replace the nth char in initial with the nth char in change. Expects initial and 5169 * change to be the same length, but will use the lesser length if they are not equal-length. Because of the 5170 * state of the text at the time modifiers are run, only lower-case letters need to be searched for. 5171 * 5172 * @param initial a String containing lower-case letters or other symbols to be swapped out of a text 5173 * @param change a String containing characters that will replace occurrences of characters in initial 5174 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5175 */ 5176 public static Modifier charReplacementTable(String initial, String change) { 5177 Alteration[] alts = new Alteration[Math.min(initial.length(), change.length())]; 5178 for (int i = 0; i < alts.length; i++) { 5179 //literal string syntax; avoids sensitive escaping issues and also doesn't need a character class, 5180 // which is slightly slower and has some odd escaping cases. 5181 alts[i] = new Alteration("\\Q" + initial.charAt(i), change.substring(i, i + 1)); 5182 } 5183 return new Modifier(alts); 5184 } 5185 5186 /** 5187 * Creates a Modifier that will replace the nth String key in map with the nth value. Because of the 5188 * state of the text at the time modifiers are run, only lower-case letters need to be searched for. 5189 * This overload of replacementTable allows full regex pattern strings as keys and replacement syntax, 5190 * such as searching for "([aeiou])\\1+" to find repeated occurrences of the same vowel, and "$1" in 5191 * this example to replace the repeated section with only the first vowel. 5192 * The ordering of map matters if a later key contains an earlier key (the earlier one will be replaced 5193 * first, possibly making the later key not match), or if an earlier replacement causes a later one to 5194 * become valid. 5195 * 5196 * @param map containing String keys to replace and String values to use instead; replacements happen in order 5197 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5198 */ 5199 public static Modifier replacementTable(OrderedMap<String, String> map) { 5200 if (map == null) 5201 return new Modifier(); 5202 Alteration[] alts = new Alteration[map.size()]; 5203 for (int i = 0; i < alts.length; i++) { 5204 alts[i] = new Alteration(map.keyAt(i), map.getAt(i)); 5205 } 5206 return new Modifier(alts); 5207 } 5208 5209 /** 5210 * Creates a Modifier that will replace the (n*2)th String in pairs with the (n*2+1)th value in pairs. Because 5211 * of the state of the text at the time modifiers are run, only lower-case letters need to be searched for. 5212 * This overload of replacementTable allows full regex syntax for search and replacement Strings, 5213 * such as searching for "([aeiou])\\1+" to find repeated occurrences of the same vowel, and "$1" in 5214 * this example to replace the repeated section with only the first vowel. 5215 * The ordering of pairs matters if a later search contains an earlier search (the earlier one will be replaced 5216 * first, possibly making the later search not match), or if an earlier replacement causes a later one to 5217 * become valid. 5218 * 5219 * @param pairs array or vararg of alternating Strings to search for and Strings to replace with; replacements happen in order 5220 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5221 */ 5222 public static Modifier replacementTable(String... pairs) { 5223 int len; 5224 if (pairs == null || (len = pairs.length) <= 1) 5225 return new Modifier(); 5226 Alteration[] alts = new Alteration[len >> 1]; 5227 for (int i = 0; i < alts.length; i++) { 5228 alts[i] = new Alteration(pairs[i<< 1], pairs[i<<1|1]); 5229 } 5230 return new Modifier(alts); 5231 } 5232 5233 /** 5234 * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the 5235 * language already uses; insertion will replace an existing vowel (at any point in a word that had a vowel 5236 * generated) with a probability of {@code chance}, so chance should be low (0.1 at most) unless you want the 5237 * newly-inserted vowel to be likely to be present in every word of some sentences. 5238 * @param insertion the String to use as an additional vowel 5239 * @param chance the chance for a vowel cluster to be replaced with insertion; normally 0.1 or less 5240 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5241 */ 5242 public static Modifier insertVowel(String insertion, double chance) 5243 { 5244 return new Modifier(anyVowelCluster, insertion, chance); 5245 } 5246 5247 /** 5248 * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants 5249 * that the language already uses; insertion will replace an existing consonant (at any point in a word that had 5250 * a consonant generated) with a probability of {@code chance}, so chance should be low (0.1 at most) unless you 5251 * want the newly-inserted consonant to be likely to be present in every word of some sentences. 5252 * @param insertion the String to use as an additional consonant 5253 * @param chance the chance for a consonant cluster to be replaced with insertion; normally 0.1 or less 5254 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5255 */ 5256 public static Modifier insertConsonant(String insertion, double chance) 5257 { 5258 return new Modifier(anyConsonantCluster, insertion, chance); 5259 } 5260 5261 /** 5262 * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the 5263 * language already uses; insertion will replace an existing vowel at the start of a word with a probability of 5264 * {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted vowel to be likely 5265 * to start every word of some sentences. Not all languages can start words with vowels, or do that very rarely, 5266 * so this might not do anything. 5267 * @param insertion the String to use as an additional opening vowel 5268 * @param chance the chance for a vowel cluster at the start of a word to be replaced with insertion; normally 0.2 or less 5269 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5270 */ 5271 public static Modifier insertOpeningVowel(String insertion, double chance) 5272 { 5273 return new Modifier("\\b[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]+", insertion, chance); 5274 } 5275 5276 /** 5277 * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants 5278 * that the language already uses; insertion will replace an existing consonant at the start of a word with a 5279 * probability of {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted 5280 * consonant to be likely to start every word of some sentences. Not all languages can start words with 5281 * consonants, or do that very rarely, so this might not do anything. 5282 * @param insertion the String to use as an additional opening consonant 5283 * @param chance the chance for a consonant cluster at the start of a word to be replaced with insertion; normally 0.2 or less 5284 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5285 */ 5286 public static Modifier insertOpeningConsonant(String insertion, double chance) 5287 { 5288 return new Modifier("\\b[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]+", insertion, chance); 5289 } 5290 5291 /** 5292 * Adds the potential for the String {@code insertion} to be used as a vowel in addition to the vowels that the 5293 * language already uses; insertion will replace an existing vowel at the end of a word with a probability of 5294 * {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted vowel to be likely 5295 * to end every word of some sentences. Not all languages can end words with vowels, or do that very 5296 * rarely, so this might not do anything. 5297 * @param insertion the String to use as an additional closing vowel 5298 * @param chance the chance for a vowel cluster at the end of a word to be replaced with insertion; normally 0.2 or less 5299 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5300 */ 5301 public static Modifier insertClosingVowel(String insertion, double chance) 5302 { 5303 return new Modifier("[àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυωаеёийоуъыэюя]+\\b", insertion, chance); 5304 } 5305 5306 /** 5307 * Adds the potential for the String {@code insertion} to be used as a consonant in addition to the consonants 5308 * that the language already uses; insertion will replace an existing consonant at the end of a word with a 5309 * probability of {@code chance}, so chance should be low (0.2 at most) unless you want the newly-inserted 5310 * consonant to be likely to end every word of some sentences. Not all languages can end words with consonants, 5311 * or do that very rarely, so this might not do anything. 5312 * @param insertion the String to use as an additional closing consonant 5313 * @param chance the chance for a consonant cluster at the end of a word to be replaced with insertion; normally 0.2 or less 5314 * @return a Modifier that can be added to a FakeLanguageGen with its addModifiers() method 5315 */ 5316 public static Modifier insertClosingConsonant(String insertion, double chance) 5317 { 5318 return new Modifier("[bcçćĉċčdþðďđfgĝğġģhĥħjĵȷkķlĺļľŀłmnñńņňŋpqrŕŗřsśŝşšștţťțvwŵẁẃẅxyýÿŷỳzźżžρσζτκχνθμπψβλγφξςбвгдклпрстфхцжмнзчшщ]+\\b", insertion, chance); 5319 } 5320 5321 /** 5322 * Replaces any characters this can produce that aren't in ASCII or Latin-1 with Latin-script stand-ins; this 5323 * will often use accented characters, but will only use those present in Latin-1 (which many fonts support). 5324 * <br> 5325 * The rationale for this Modifier is to allow users of FakeLanguageGen who don't display with the wide-ranging 5326 * fonts in the display module to still be able to display something reasonable for generated text. 5327 */ 5328 public static final Modifier REDUCE_ACCENTS = replacementTable("ā", "â", "ă", "ä", "ą", "ã", "ǻ", "å", "ǽ", "áe", 5329 "ē", "ê", "ĕ", "ë", "ė", "ë", "ę", "è", "ě", "é", "ĩ", "í", "ī", "î", "į", "ì", "ĭ", "ï", "ı", "iy", "ō", "ô", 5330 "ŏ", "ö", "ő", "ó", "œ", "oe", "ǿ", "ø", "ũ", "ú", "ŭ", "ü", "ů", "ùo", "ű", "ú", "ų", "ù", "ŷ", "ý", "ỳ", "ÿ", 5331 // done with latin vowels... 5332 "ć", "ç", "ĉ", "ç", "ċ", "ç", "č", "ç", "ď", "dh", "đ", "dh", "ĝ", "gh", "ğ", "gh", "ġ", "gh", "ģ", "gh", 5333 "ĥ", "hh", "ħ", "hh", "ĵ", "jh", "ȷ", "jj", "ķ", "kc", "ĺ", "lh", "ļ", "ll", "ľ", "ly", "ŀł", "yl", "ł", "wl", 5334 "ń", "nn", "ņ", "wn", "ň", "nh", "ŋ", "ng", "ŕ", "rh", "ŗ", "wr", "ř", "rr", "ś", "ss", "ŝ", "hs", 5335 "ş", "sy", "š", "ws", "ș", "sw", "ţ", "wt", "ť", "tt", "ț", "ty", "ŵ", "ww", "ẁ", "hw", "ẃ", "wh", "ẅ", "uw", 5336 "ź", "hz", "ż", "zy", "ž", "zz", 5337 // greek 5338 "α", "a", "ο", "o", "ε", "e", "ι", "i", "υ", "y", "ω", "au", 5339 "κρ", "kr", "γγ", "ng", "γκ", "nk", "γξ", "nx", "γχ", "nch", "ρστ", "rst", "ρτ", "rt", 5340 "ρ", "rh", "σ", "s", "ζ", "z", "τ", "t", "κ", "k", "χ", "ch", "ν", "n", "ξ", "x", 5341 "θ", "th", "μ", "m", "π", "p", "ψ", "ps", "β", "b", "λ", "l", "γ", "g", "δ", "d", "φ", "ph", "ς", "s", 5342 // cyrillic 5343 "а", "a", "е", "e", "ё", "ë", "и", "i", "й", "î", "о", "o", "у", "u", "ъ", "ie", "ы", "y", "э", "e", "ю", "iu", "я", "ia", 5344 "б", "b", "в", "v", "г", "g", "д", "d", "к", "k", "л", "l", "п", "p", "р", "r", "с", "s", "т", "t", 5345 "ф", "f", "х", "kh", "ц", "ts", "ч", "ch", "ж", "zh", "м", "m", "н", "n", "з", "z", "ш", "sh", "щ", "shch"); 5346 5347 @Override 5348 public boolean equals(Object o) { 5349 if (this == o) return true; 5350 if (o == null || getClass() != o.getClass()) return false; 5351 5352 Modifier modifier = (Modifier) o; 5353 5354 // Probably incorrect - comparing Object[] arrays with Arrays.equals 5355 return Arrays.equals(alterations, modifier.alterations); 5356 } 5357 5358 @Override 5359 public int hashCode() { 5360 return CrossHash.hash(alterations); 5361 } 5362 5363 @Override 5364 public String toString() { 5365 return "Modifier{" + 5366 "alterations=" + Arrays.toString(alterations) + 5367 '}'; 5368 } 5369 5370 public String serializeToString() { 5371 if (alterations.length == 0) return "\6"; 5372 modSB.setLength(0); 5373 modSB.append('\6'); 5374 for (int i = 0; i < alterations.length; i++) 5375 modSB.append(alterations[i].serializeToString()).append('\6'); 5376 return modSB.toString(); 5377 } 5378 5379 public static Modifier deserializeFromString(String data) { 5380 int currIdx = data.indexOf(6), altIdx = currIdx, matches = 0; 5381 while (currIdx >= 0) { 5382 if ((currIdx = data.indexOf(6, currIdx + 1)) < 0) 5383 break; 5384 matches++; 5385 } 5386 Alteration[] alts = new Alteration[matches]; 5387 for (int i = 0; i < matches; i++) { 5388 alts[i] = Alteration.deserializeFromString(data.substring(altIdx + 1, altIdx = data.indexOf(6, altIdx + 1))); 5389 } 5390 return new Modifier(alts); 5391 } 5392 } 5393 5394 public static class Alteration implements Serializable { 5395 private static final long serialVersionUID = -2138854697837563188L; 5396 public Replacer replacer; 5397 public String replacement; 5398 public double chance; 5399 5400 public Alteration() { 5401 this("[tţťț]?[sśŝşšș]+h?", "th"); 5402 } 5403 5404 public Alteration(String pattern, String replacement) { 5405 this.replacement = replacement; 5406 replacer = Pattern.compile(pattern).replacer(replacement); 5407 chance = 1.0; 5408 } 5409 5410 public Alteration(String pattern, String replacement, double chance) { 5411 this.replacement = replacement; 5412 replacer = Pattern.compile(pattern).replacer(replacement); 5413 this.chance = chance; 5414 } 5415 5416 public Alteration(Pattern pattern, String replacement, double chance) { 5417 this.replacement = replacement; 5418 replacer = pattern.replacer(replacement); 5419 this.chance = chance; 5420 } 5421 5422 @Override 5423 public boolean equals(Object o) { 5424 if (this == o) return true; 5425 if (o == null || getClass() != o.getClass()) return false; 5426 5427 Alteration that = (Alteration) o; 5428 5429 if (Double.compare(that.chance, chance) != 0) return false; 5430 return replacer.equals(that.replacer); 5431 5432 } 5433 5434 @Override 5435 public int hashCode() { 5436 long result; 5437 result = CrossHash.hash64(replacer.getPattern().serializeToString()); 5438 result = 31L * result + NumberTools.doubleToLongBits(chance); 5439 result ^= result >>> 32; 5440 return (int) (0xFFFFFFFFL & result); 5441 } 5442 5443 @Override 5444 public String toString() { 5445 return "Alteration{" + 5446 "replacer=" + replacer + 5447 ", chance=" + chance + 5448 '}'; 5449 } 5450 5451 public String serializeToString() { 5452 return replacer.getPattern().serializeToString() + '\2' + replacement + '\4' + chance; 5453 } 5454 5455 public static Alteration deserializeFromString(String data) { 5456 int split2 = data.indexOf('\2'), split4 = data.indexOf('\4'); 5457 return new Alteration(Pattern.deserializeFromString(data.substring(0, split2)), 5458 data.substring(split2 + 1, split4), 5459 Double.parseDouble(data.substring(split4 + 1))); 5460 } 5461 } 5462 5463 /** 5464 * A simple way to bundle a FakeLanguageGen with the arguments that would be passed to it when calling 5465 * {@link FakeLanguageGen#sentence(IRNG, int, int, String[], String[], double, int)} or one of its overloads. 5466 * You can call {@link #sentence()} on this to produce another String sentence with the parameters it was given 5467 * at construction. The parameters to 5468 * {@link #SentenceForm(FakeLanguageGen, IStatefulRNG, int, int, String[], String[], double, int)} are stored in fields of 5469 * the same name, and all fields in this class are public and modifiable. 5470 */ 5471 public static class SentenceForm implements Serializable 5472 { 5473 private static final long serialVersionUID = 1246527948419533147L; 5474 public IStatefulRNG rng; 5475 public int minWords, maxWords, maxChars; 5476 public String[] midPunctuation, endPunctuation; 5477 public double midPunctuationFrequency; 5478 public FakeLanguageGen language; 5479 5480 /** 5481 * Builds a SentenceForm with all default fields, using {@link FakeLanguageGen#FANTASY_NAME} for a language, 5482 * using between 1 and 9 words in a sentence, and otherwise defaulting to how 5483 * {@link #SentenceForm(FakeLanguageGen, int, int)} behaves. 5484 */ 5485 public SentenceForm() 5486 { 5487 this(FakeLanguageGen.FANTASY_NAME, FakeLanguageGen.srng, 1, 9, 5488 mid, 5489 end, 0.18, -1); 5490 } 5491 /** 5492 * Builds a SentenceForm with only a few fields specified. The {@link #rng} will be made based on 5493 * FakeLanguageGen's static {@link FakeLanguageGen#srng} field, maxChars will be -1 so the sentence length 5494 * will be limited only by maxWords and the length of words produced, and the between-word and end-of-sentence 5495 * punctuation will be set to reasonable defaults. This places either a comma or a semicolon after a word in the 5496 * middle of a sentence about 18% of the time ({@code midPunctuationFrequency} is 0.18), and can end a sentence 5497 * in a period, exclamation mark, question mark, or ellipsis (the "..." punctuation). 5498 * @param language A FakeLanguageGen to use to generate words 5499 * @param minWords minimum words per sentence 5500 * @param maxWords maximum words per sentence 5501 */ 5502 public SentenceForm(FakeLanguageGen language, int minWords, int maxWords) 5503 { 5504 this(language, FakeLanguageGen.srng, minWords, maxWords, mid, 5505 end, 0.18, -1); 5506 } 5507 /** 5508 * Builds a SentenceForm with all fields specified except for {@link #rng}, which will be made based on 5509 * FakeLanguageGen's static {@link FakeLanguageGen#srng} field, and maxChars, which means the sentence length 5510 * will be limited only by maxWords and the length of words produced. 5511 * @param language A FakeLanguageGen to use to generate words 5512 * @param minWords minimum words per sentence 5513 * @param maxWords maximum words per sentence 5514 * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";" 5515 * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..." 5516 * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0 5517 */ 5518 public SentenceForm(FakeLanguageGen language, int minWords, int maxWords, String[] midPunctuation, 5519 String[] endPunctuation, double midPunctuationFrequency) 5520 { 5521 this(language, FakeLanguageGen.srng, minWords, maxWords, midPunctuation, endPunctuation, 5522 midPunctuationFrequency, -1); 5523 } 5524 /** 5525 * Builds a SentenceForm with all fields specified except for {@link #rng}, which will be made based on 5526 * FakeLanguageGen's static {@link FakeLanguageGen#srng} field. 5527 * @param language A FakeLanguageGen to use to generate words 5528 * @param minWords minimum words per sentence 5529 * @param maxWords maximum words per sentence 5530 * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";" 5531 * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..." 5532 * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0 5533 * @param maxChars the maximum number of chars to use in a sentence, or -1 for no hard limit 5534 */ 5535 public SentenceForm(FakeLanguageGen language, int minWords, int maxWords, String[] midPunctuation, 5536 String[] endPunctuation, double midPunctuationFrequency, int maxChars) 5537 { 5538 this(language, FakeLanguageGen.srng, minWords, maxWords, midPunctuation, endPunctuation, 5539 midPunctuationFrequency, maxChars); 5540 } 5541 5542 /** 5543 * Builds a SentenceForm with all fields specified; each value is referenced directly except for {@code rng}, 5544 * which will not change or be directly referenced (a new GWTRNG will be used with the same state value). 5545 * @param language A FakeLanguageGen to use to generate words 5546 * @param rng a StatefulRNG that will not be directly referenced; the state will be copied into a new StatefulRNG 5547 * @param minWords minimum words per sentence 5548 * @param maxWords maximum words per sentence 5549 * @param midPunctuation an array of Strings that can be used immediately after words in the middle of sentences, like "," or ";" 5550 * @param endPunctuation an array of Strings that can end a sentence, like ".", "?", or "..." 5551 * @param midPunctuationFrequency the probability that two words will be separated by a String from midPunctuation, between 0.0 and 1.0 5552 * @param maxChars the maximum number of chars to use in a sentence, or -1 for no hard limit 5553 */ 5554 public SentenceForm(FakeLanguageGen language, IStatefulRNG rng, int minWords, int maxWords, 5555 String[] midPunctuation, String[] endPunctuation, 5556 double midPunctuationFrequency, int maxChars) 5557 { 5558 this.language = language; 5559 this.rng = new GWTRNG(rng.getState()); 5560 this.minWords = minWords; 5561 this.maxWords = maxWords; 5562 this.midPunctuation = midPunctuation; 5563 this.endPunctuation = endPunctuation; 5564 this.midPunctuationFrequency = midPunctuationFrequency; 5565 this.maxChars = maxChars; 5566 } 5567 public String sentence() 5568 { 5569 return language.sentence(rng, minWords, maxWords, midPunctuation, endPunctuation, 5570 midPunctuationFrequency, maxChars); 5571 } 5572 5573 public String serializeToString() { 5574 return language.serializeToString() + '℘' + 5575 rng.getState() + '℘' + 5576 minWords + '℘' + 5577 maxWords + '℘' + 5578 StringKit.join("ℙ", midPunctuation) + '℘' + 5579 StringKit.join("ℙ", endPunctuation) + '℘' + 5580 NumberTools.doubleToLongBits(midPunctuationFrequency) + '℘' + 5581 maxChars; 5582 } 5583 public static SentenceForm deserializeFromString(String ser) 5584 { 5585 int gap = ser.indexOf('℘'); 5586 FakeLanguageGen lang = FakeLanguageGen.deserializeFromString(ser.substring(0, gap)); 5587 GWTRNG rng = new GWTRNG( 5588 StringKit.longFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1))); 5589 int minWords = StringKit.intFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1)); 5590 int maxWords = StringKit.intFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1)); 5591 String[] midPunctuation = 5592 StringKit.split(ser.substring(gap + 1, gap = ser.indexOf('℘', gap + 1)), "ℙ"); 5593 String[] endPunctuation = 5594 StringKit.split(ser.substring(gap + 1, gap = ser.indexOf('℘', gap + 1)), "ℙ"); 5595 double midFreq = NumberTools.longBitsToDouble(StringKit.longFromDec(ser,gap + 1, gap = ser.indexOf('℘', gap + 1))); 5596 int maxChars = StringKit.intFromDec(ser,gap + 1, ser.length()); 5597 return new SentenceForm(lang, rng, minWords, maxWords, midPunctuation, endPunctuation, midFreq, maxChars); 5598 } 5599 } 5600}