001package squidpony; 002 003import regexodus.Category; 004import squidpony.squidmath.GWTRNG; 005import squidpony.squidmath.IStatefulRNG; 006import squidpony.squidmath.ProbabilityTable; 007 008import java.util.ArrayList; 009import java.util.HashMap; 010import java.util.Set; 011 012/** 013 * Based on work by Nolithius available at the following two sites 014 * https://github.com/Nolithius/weighted-letter-namegen 015 * http://code.google.com/p/weighted-letter-namegen/ 016 * 017 * @see FakeLanguageGen FakeLanguageGen is meant for generating more than just names, and can imitate language styles. 018 * @author Eben Howard - http://squidpony.com - howard@squidpony.com 019 */ 020public class WeightedLetterNamegen { 021//<editor-fold defaultstate="collapsed" desc="Viking Style static name list"> 022 023 public static final String[] VIKING_STYLE_NAMES = new String[]{ 024 "Andor", 025 "Baatar", 026 "Beowulf", 027 "Drogo", 028 "Freya", 029 "Grog", 030 "Gruumsh", 031 "Grunt", 032 "Hodor", 033 "Hrothgar", 034 "Hrun", 035 "Korg", 036 "Lothar", 037 "Odin", 038 "Theodrin", 039 "Thor", 040 "Yngvar", 041 "Xandor" 042 }; 043 //</editor-fold> 044//<editor-fold defaultstate="collapsed" desc="Star Wars Style static name list"> 045 public static final String[] STAR_WARS_STYLE_NAMES = new String[]{ 046 "Lutoif Vap", 047 "Nasoi Seert", 048 "Jitpai", 049 "Sose", 050 "Vainau", 051 "Jairkau", 052 "Tirka Kist", 053 "Boush", 054 "Wofe", 055 "Voxin Voges", 056 "Koux Boiti", 057 "Loim", 058 "Gaungu", 059 "Mut Tep", 060 "Foimo Saispi", 061 "Toneeg Vaiba", 062 "Nix Nast", 063 "Gup Dangisp", 064 "Distark Toonausp", 065 "Tex Brinki", 066 "Kat Tosha", 067 "Tauna Foip", 068 "Frip Cex", 069 "Fexa Lun", 070 "Tafa", 071 "Zeesheerk", 072 "Cremoim Kixoop", 073 "Tago", 074 "Kesha Diplo" 075 }; 076 //</editor-fold> 077//<editor-fold defaultstate="collapsed" desc="USA male names static name list"> 078 public static final String[] COMMON_USA_MALE_NAMES = new String[]{ 079 "James", 080 "John", 081 "Robert", 082 "Michael", 083 "William", 084 "David", 085 "Richard", 086 "Charles", 087 "Joseph", 088 "Tomas", 089 "Christopher", 090 "Daniel", 091 "Paul", 092 "Mark", 093 "Donald", 094 "George", 095 "Kenneth", 096 "Steven", 097 "Edward", 098 "Brian", 099 "Ronald", 100 "Anthony", 101 "Kevin", 102 "Jason", 103 "Matthew", 104 "Gary", 105 "Timothy", 106 "Jose", 107 "Larry", 108 "Jeffrey", 109 "Frank", 110 "Scott", 111 "Eric", 112 "Stephen", 113 "Andrew", 114 "Raymond", 115 "Gregory", 116 "Joshua", 117 "Jerry", 118 "Dennis", 119 "Walter", 120 "Patrick", 121 "Peter", 122 "Harold", 123 "Douglas", 124 "Henry", 125 "Carl", 126 "Arthur", 127 "Ryan", 128 "Roger" 129 }; 130 //</editor-fold> 131//<editor-fold defaultstate="collapsed" desc="USA female names static name list"> 132 public static final String[] COMMON_USA_FEMALE_NAMES = new String[]{ 133 "Mary", 134 "Patricia", 135 "Linda", 136 "Barbara", 137 "Elizabeth", 138 "Jennifer", 139 "Maria", 140 "Susan", 141 "Margaret", 142 "Dorothy", 143 "Lisa", 144 "Nancy", 145 "Karen", 146 "Betty", 147 "Helen", 148 "Sandra", 149 "Donna", 150 "Carol", 151 "Ruth", 152 "Sharon", 153 "Michelle", 154 "Laura", 155 "Sarah", 156 "Kimberly", 157 "Deborah", 158 "Jessica", 159 "Shirley", 160 "Cynthia", 161 "Angela", 162 "Melissa", 163 "Brenda", 164 "Amy", 165 "Anna", 166 "Crystal", 167 "Virginia", 168 "Kathleen", 169 "Pamela", 170 "Martha", 171 "Becky", 172 "Amanda", 173 "Stephanie", 174 "Carolyn", 175 "Christine", 176 "Marie", 177 "Janet", 178 "Catherine", 179 "Frances", 180 "Ann", 181 "Joyce", 182 "Diane", 183 "Jane", 184 "Shauna", 185 "Trisha", 186 "Eileen", 187 "Danielle", 188 "Jacquelyn", 189 "Lynn", 190 "Hannah", 191 "Brittany" 192 }; 193 //</editor-fold> 194//<editor-fold defaultstate="collapsed" desc="USA last names static name list"> 195 public static final String[] COMMON_USA_LAST_NAMES = new String[]{ 196 "Smith", 197 "Johnson", 198 "Williams", 199 "Brown", 200 "Jones", 201 "Miller", 202 "Davis", 203 "Wilson", 204 "Anderson", 205 "Taylor", 206 "Thomas", 207 "Moore", 208 "Martin", 209 "Jackson", 210 "Thompson", 211 "White", 212 "Clark", 213 "Lewis", 214 "Robinson", 215 "Walker", 216 "Willis", 217 "Carter", 218 "King", 219 "Lee", 220 "Grant", 221 "Howard", 222 "Morris", 223 "Bartlett", 224 "Paine", 225 "Wayne", 226 "Lorraine" 227 }; 228//</editor-fold> 229 230 //<editor-fold defaultstate="collapsed" desc="Lovecraft Mythos style static name list"> 231 public static final String[] LOVECRAFT_MYTHOS_NAMES = new String[]{ 232 "Koth", 233 "Ghlatelilt", 234 "Siarlut", 235 "Nyogongogg", 236 "Nyialan", 237 "Nyithiark", 238 "Lyun", 239 "Kethoshigr", 240 "Shobik", 241 "Tekogr", 242 "Hru-yn", 243 "Lya-ehibos", 244 "Hruna-oma-ult", 245 "Shabo'en", 246 "Shrashangal", 247 "Shukhaniark", 248 "Thaghum", 249 "Shrilang", 250 "Lukhungu'ith", 251 "Nyun", 252 "Nyia-ongin", 253 "Shogia-usun", 254 "Lyu-yl", 255 "Liathiagragr", 256 "Lyathagg", 257 "Hri'osurkut", 258 "Shothegh", 259 "No-orleshigh", 260 "Zvriangekh", 261 "Nyesashiv", 262 "Lyarkio", 263 "Le'akh", 264 "Liashi-en", 265 "Shurkano'um", 266 "Hrakhanoth", 267 "Ghlotsuban", 268 "Cthitughias", 269 "Ftanugh" 270 }; 271//</editor-fold> 272 273 private static final char[] vowels = {'a', 'e', 'i', 'o'};//not using y because it looks strange as a vowel in names 274 private static final int LAST_LETTER_CANDIDATES_MAX = 52; 275 276 private IStatefulRNG rng; 277 private String[] names; 278 private int consonantLimit; 279 private ArrayList<Integer> sizes; 280 281 private HashMap<Character, HashMap<Character, ProbabilityTable<Character>>> letters; 282 private ArrayList<Character> firstLetterSamples; 283 private ArrayList<Character> lastLetterSamples; 284 private DamerauLevenshteinAlgorithm dla = new DamerauLevenshteinAlgorithm(1, 1, 1, 1); 285 286 /** 287 * Creates the generator by seeding the provided list of names. 288 * 289 * @param names an array of Strings that are typical names to be emulated 290 */ 291 public WeightedLetterNamegen(String[] names) { 292 this(names, 2); 293 } 294 295 /** 296 * Creates the generator by seeding the provided list of names. 297 * 298 * @param names an array of Strings that are typical names to be emulated 299 * @param consonantLimit the maximum allowed consonants in a row 300 */ 301 public WeightedLetterNamegen(String[] names, int consonantLimit) { 302 this(names, consonantLimit, new GWTRNG()); 303 } 304 305 /** 306 * Creates the generator by seeding the provided list of names. The given RNG will be used for 307 * all random decisions this has to make, so if it has the same state (and RandomnessSource) on 308 * different runs through the program, it will produce the same names reliably. 309 * 310 * @param names an array of Strings that are typical names to be emulated 311 * @param consonantLimit the maximum allowed consonants in a row 312 * @param rng the source of randomness to be used 313 */ 314 public WeightedLetterNamegen(String[] names, int consonantLimit, IStatefulRNG rng) { 315 this.names = names; 316 this.consonantLimit = consonantLimit; 317 this.rng = rng; 318 init(); 319 } 320 321 /** 322 * Initialization, statistically measures letter likelihood. 323 */ 324 private void init() { 325 sizes = new ArrayList<>(); 326 letters = new HashMap<>(); 327 firstLetterSamples = new ArrayList<>(); 328 lastLetterSamples = new ArrayList<>(); 329 330 for (int i = 0; i < names.length - 1; i++) { 331 String name = names[i]; 332 if (name == null || name.length() < 1) { 333 continue; 334 } 335 336 // (1) Insert size 337 sizes.add(name.length()); 338 339 // (2) Grab first letter 340 firstLetterSamples.add(name.charAt(0)); 341 342 // (3) Grab last letter 343 lastLetterSamples.add(name.charAt(name.length() - 1)); 344 345 // (4) Process all letters 346 for (int n = 0; n < name.length() - 1; n++) { 347 char letter = name.charAt(n); 348 char nextLetter = name.charAt(n + 1); 349 350 // Create letter if it doesn't exist 351 HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letter); 352 if (wl == null) { 353 wl = new HashMap<>(); 354 letters.put(letter, wl); 355 } 356 ProbabilityTable<Character> wlg = wl.get(letter); 357 if (wlg == null) { 358 wlg = new ProbabilityTable<>(rng.getState()); 359 wl.put(letter, wlg); 360 } 361 wlg.add(nextLetter, 1); 362 363 // If letter was uppercase (beginning of name), also add a lowercase entry 364 if (Category.Lu.contains(letter)) { 365 letter = Character.toLowerCase(letter); 366 367 wlg = wl.get(letter); 368 if (wlg == null) { 369 wlg = new ProbabilityTable<>(rng.getState()); 370 wl.put(letter, wlg); 371 } 372 wlg.add(nextLetter, 1); 373 } 374 } 375 } 376 } 377 378 private StringBuilder generateInner(StringBuilder name) { 379 for (int runs = 0; runs < LAST_LETTER_CANDIDATES_MAX; runs++) { 380 name.setLength(0); 381 // Pick size 382 int size = rng.getRandomElement(sizes); 383 384 // Pick first letter 385 char latest = rng.getRandomElement(firstLetterSamples); 386 name.append(latest); 387 388 for (int i = 1; i < size - 2; i++) { 389 name.append(latest = getRandomNextLetter(latest)); 390 } 391 392 // Attempt to find a last letter 393 for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) { 394 char lastLetter = rng.getRandomElement(lastLetterSamples); 395 char intermediateLetterCandidate = getIntermediateLetter(latest, lastLetter); 396 397 // Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end) 398 if (Category.L.contains(intermediateLetterCandidate)) { 399 name.append(intermediateLetterCandidate).append(lastLetter); 400 break; 401 } 402 } 403 404 // Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher 405 if (validateGrouping(name) && checkLevenshtein(name)) { 406 return name; 407 } 408 } 409 name.setLength(0); 410 return name.append(rng.getRandomElement(names)); 411 } 412 /** 413 * Gets one random String name. 414 * 415 * @return a single random String name 416 */ 417 418 public String generate() { 419 return generateInner(new StringBuilder(32)).toString(); 420 } 421 422 /** 423 * Gets an ArrayList of random String names, sized to match amountToGenerate. 424 * @param amountToGenerate how many String items to include in the returned ArrayList 425 * @return an ArrayList of random String names 426 */ 427 public ArrayList<String> generateList(int amountToGenerate) { 428 ArrayList<String> result = new ArrayList<>(); 429 430 StringBuilder name = new StringBuilder(32); 431 for (int i = 0; i < amountToGenerate; i++) { 432 result.add(generateInner(name).toString()); 433 } 434 435 return result; 436 } 437 /** 438 * Gets an array of random String names, sized to match amountToGenerate. 439 * 440 * @param amountToGenerate how many String items to include in the returned array 441 * @return an array of random String names 442 */ 443 444 public String[] generate(int amountToGenerate) 445 { 446 return generateList(amountToGenerate).toArray(new String[0]); 447 } 448 449 /** 450 * Searches for the best fit letter between the letter before and the letter 451 * after (non-random). Used to determine penultimate letters in names. 452 * 453 * @param letterBefore The letter before the desired letter. 454 * @param letterAfter The letter after the desired letter. 455 * @return The best fit letter between the provided letters. 456 */ 457 private char getIntermediateLetter(char letterBefore, char letterAfter) { 458 if (Category.L.contains(letterBefore) && Category.L.contains(letterAfter)) { 459 // First grab all letters that come after the 'letterBefore' 460 HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letterBefore); 461 if (wl == null) { 462 return getRandomNextLetter(letterBefore); 463 } 464 Set<Character> letterCandidates = wl.get(letterBefore).items(); 465 466 char bestFitLetter = '\''; 467 int bestFitScore = 0; 468 469 // Step through candidates, and return best scoring letter 470 for (char letter : letterCandidates) { 471 wl = letters.get(letter); 472 if (wl == null) { 473 continue; 474 } 475 ProbabilityTable<Character> weightedLetterGroup = wl.get(letterBefore); 476 if (weightedLetterGroup != null) { 477 int letterCounter = weightedLetterGroup.weight(letterAfter); 478 if (letterCounter > bestFitScore) { 479 bestFitLetter = letter; 480 bestFitScore = letterCounter; 481 } 482 } 483 } 484 485 return bestFitLetter; 486 } else { 487 return '-'; 488 } 489 } 490 491 /** 492 * Checks that no three letters happen in succession. 493 * 494 * @param name The name CharSequence 495 * @return True if no triple letter sequence is found. 496 */ 497 private boolean validateGrouping(CharSequence name) { 498 for (int i = 2; i < name.length(); i++) { 499 if (name.charAt(i) == name.charAt(i - 1) && name.charAt(i) == name.charAt(i - 2)) { 500 return false; 501 } 502 } 503 int consonants = 0; 504 for (int i = 0; i < name.length(); i++) { 505 if (isVowel(name.charAt(i))) { 506 consonants = 0; 507 } else { 508 if (++consonants > consonantLimit) { 509 return false; 510 } 511 } 512 } 513 return true; 514 } 515 516 private boolean isVowel(char c) { 517 switch(c) 518 { 519 case 'a': 520 case 'e': 521 case 'i': 522 case 'o': 523 case 'u': 524 return true; 525 default: 526 return false; 527 } 528 } 529 530 /** 531 * Checks that the Damerau-Levenshtein distance of this name is within a 532 * given bias from a name on the master list. 533 * 534 * @param name The name string. 535 * @return True if a name is found that is within the bias. 536 */ 537 private boolean checkLevenshtein(CharSequence name) { 538 int levenshteinBias = name.length() / 2; 539 540 for (String name1 : names) { 541 int levenshteinDistance = dla.execute(name, name1); 542 if (levenshteinDistance <= levenshteinBias) { 543 return true; 544 } 545 } 546 547 return false; 548 } 549 550 private char getRandomNextLetter(char letter) { 551 if (letters.containsKey(letter)) { 552 return letters.get(letter).get(letter).random(); 553 } else { 554 return vowels[rng.next(2)]; // 2 bits, so ranging from 0 to 3 555 } 556 } 557}