001package squidpony;
002
003import regexodus.Category;
004import squidpony.squidmath.GWTRNG;
005import squidpony.squidmath.IStatefulRNG;
006import squidpony.squidmath.ProbabilityTable;
007
008import java.util.ArrayList;
009import java.util.HashMap;
010import java.util.Set;
011
012/**
013 * Based on work by Nolithius available at the following two sites
014 * https://github.com/Nolithius/weighted-letter-namegen
015 * http://code.google.com/p/weighted-letter-namegen/
016 *
017 * @see FakeLanguageGen FakeLanguageGen is meant for generating more than just names, and can imitate language styles.
018 * @author Eben Howard - http://squidpony.com - howard@squidpony.com
019 */
020public class WeightedLetterNamegen {
021//<editor-fold defaultstate="collapsed" desc="Viking Style static name list">
022
023    public static final String[] VIKING_STYLE_NAMES = new String[]{
024            "Andor",
025            "Baatar",
026            "Beowulf",
027            "Drogo",
028            "Freya",
029            "Grog",
030            "Gruumsh",
031            "Grunt",
032            "Hodor",
033            "Hrothgar",
034            "Hrun",
035            "Korg",
036            "Lothar",
037            "Odin",
038            "Theodrin",
039            "Thor",
040            "Yngvar",
041            "Xandor"
042    };
043    //</editor-fold>
044//<editor-fold defaultstate="collapsed" desc="Star Wars Style static name list">
045    public static final String[] STAR_WARS_STYLE_NAMES = new String[]{
046            "Lutoif Vap",
047            "Nasoi Seert",
048            "Jitpai",
049            "Sose",
050            "Vainau",
051            "Jairkau",
052            "Tirka Kist",
053            "Boush",
054            "Wofe",
055            "Voxin Voges",
056            "Koux Boiti",
057            "Loim",
058            "Gaungu",
059            "Mut Tep",
060            "Foimo Saispi",
061            "Toneeg Vaiba",
062            "Nix Nast",
063            "Gup Dangisp",
064            "Distark Toonausp",
065            "Tex Brinki",
066            "Kat Tosha",
067            "Tauna Foip",
068            "Frip Cex",
069            "Fexa Lun",
070            "Tafa",
071            "Zeesheerk",
072            "Cremoim Kixoop",
073            "Tago",
074            "Kesha Diplo"
075    };
076    //</editor-fold>
077//<editor-fold defaultstate="collapsed" desc="USA male names static name list">
078    public static final String[] COMMON_USA_MALE_NAMES = new String[]{
079            "James",
080            "John",
081            "Robert",
082            "Michael",
083            "William",
084            "David",
085            "Richard",
086            "Charles",
087            "Joseph",
088            "Tomas",
089            "Christopher",
090            "Daniel",
091            "Paul",
092            "Mark",
093            "Donald",
094            "George",
095            "Kenneth",
096            "Steven",
097            "Edward",
098            "Brian",
099            "Ronald",
100            "Anthony",
101            "Kevin",
102            "Jason",
103            "Matthew",
104            "Gary",
105            "Timothy",
106            "Jose",
107            "Larry",
108            "Jeffrey",
109            "Frank",
110            "Scott",
111            "Eric",
112            "Stephen",
113            "Andrew",
114            "Raymond",
115            "Gregory",
116            "Joshua",
117            "Jerry",
118            "Dennis",
119            "Walter",
120            "Patrick",
121            "Peter",
122            "Harold",
123            "Douglas",
124            "Henry",
125            "Carl",
126            "Arthur",
127            "Ryan",
128            "Roger"
129    };
130    //</editor-fold>
131//<editor-fold defaultstate="collapsed" desc="USA female names static name list">
132    public static final String[] COMMON_USA_FEMALE_NAMES = new String[]{
133            "Mary",
134            "Patricia",
135            "Linda",
136            "Barbara",
137            "Elizabeth",
138            "Jennifer",
139            "Maria",
140            "Susan",
141            "Margaret",
142            "Dorothy",
143            "Lisa",
144            "Nancy",
145            "Karen",
146            "Betty",
147            "Helen",
148            "Sandra",
149            "Donna",
150            "Carol",
151            "Ruth",
152            "Sharon",
153            "Michelle",
154            "Laura",
155            "Sarah",
156            "Kimberly",
157            "Deborah",
158            "Jessica",
159            "Shirley",
160            "Cynthia",
161            "Angela",
162            "Melissa",
163            "Brenda",
164            "Amy",
165            "Anna",
166            "Crystal",
167            "Virginia",
168            "Kathleen",
169            "Pamela",
170            "Martha",
171            "Becky",
172            "Amanda",
173            "Stephanie",
174            "Carolyn",
175            "Christine",
176            "Marie",
177            "Janet",
178            "Catherine",
179            "Frances",
180            "Ann",
181            "Joyce",
182            "Diane",
183            "Jane",
184            "Shauna",
185            "Trisha",
186            "Eileen",
187            "Danielle",
188            "Jacquelyn",
189            "Lynn",
190            "Hannah",
191            "Brittany"
192    };
193    //</editor-fold>
194//<editor-fold defaultstate="collapsed" desc="USA last names static name list">
195    public static final String[] COMMON_USA_LAST_NAMES = new String[]{
196            "Smith",
197            "Johnson",
198            "Williams",
199            "Brown",
200            "Jones",
201            "Miller",
202            "Davis",
203            "Wilson",
204            "Anderson",
205            "Taylor",
206            "Thomas",
207            "Moore",
208            "Martin",
209            "Jackson",
210            "Thompson",
211            "White",
212            "Clark",
213            "Lewis",
214            "Robinson",
215            "Walker",
216            "Willis",
217            "Carter",
218            "King",
219            "Lee",
220            "Grant",
221            "Howard",
222            "Morris",
223            "Bartlett",
224            "Paine",
225            "Wayne",
226            "Lorraine"
227    };
228//</editor-fold>
229
230    //<editor-fold defaultstate="collapsed" desc="Lovecraft Mythos style static name list">
231    public static final String[] LOVECRAFT_MYTHOS_NAMES = new String[]{
232            "Koth",
233            "Ghlatelilt",
234            "Siarlut",
235            "Nyogongogg",
236            "Nyialan",
237            "Nyithiark",
238            "Lyun",
239            "Kethoshigr",
240            "Shobik",
241            "Tekogr",
242            "Hru-yn",
243            "Lya-ehibos",
244            "Hruna-oma-ult",
245            "Shabo'en",
246            "Shrashangal",
247            "Shukhaniark",
248            "Thaghum",
249            "Shrilang",
250            "Lukhungu'ith",
251            "Nyun",
252            "Nyia-ongin",
253            "Shogia-usun",
254            "Lyu-yl",
255            "Liathiagragr",
256            "Lyathagg",
257            "Hri'osurkut",
258            "Shothegh",
259            "No-orleshigh",
260            "Zvriangekh",
261            "Nyesashiv",
262            "Lyarkio",
263            "Le'akh",
264            "Liashi-en",
265            "Shurkano'um",
266            "Hrakhanoth",
267            "Ghlotsuban",
268            "Cthitughias",
269            "Ftanugh"
270    };
271//</editor-fold>
272
273    private static final char[] vowels = {'a', 'e', 'i', 'o'};//not using y because it looks strange as a vowel in names
274    private static final int LAST_LETTER_CANDIDATES_MAX = 52;
275
276    private IStatefulRNG rng;
277    private String[] names;
278    private int consonantLimit;
279    private ArrayList<Integer> sizes;
280
281    private HashMap<Character, HashMap<Character, ProbabilityTable<Character>>> letters;
282    private ArrayList<Character> firstLetterSamples;
283    private ArrayList<Character> lastLetterSamples;
284    private DamerauLevenshteinAlgorithm dla = new DamerauLevenshteinAlgorithm(1, 1, 1, 1);
285
286    /**
287     * Creates the generator by seeding the provided list of names.
288     *
289     * @param names an array of Strings that are typical names to be emulated
290     */
291    public WeightedLetterNamegen(String[] names) {
292        this(names, 2);
293    }
294
295    /**
296     * Creates the generator by seeding the provided list of names.
297     *
298     * @param names          an array of Strings that are typical names to be emulated
299     * @param consonantLimit the maximum allowed consonants in a row
300     */
301    public WeightedLetterNamegen(String[] names, int consonantLimit) {
302        this(names, consonantLimit, new GWTRNG());
303    }
304
305    /**
306     * Creates the generator by seeding the provided list of names. The given RNG will be used for
307     * all random decisions this has to make, so if it has the same state (and RandomnessSource) on
308     * different runs through the program, it will produce the same names reliably.
309     *
310     * @param names          an array of Strings that are typical names to be emulated
311     * @param consonantLimit the maximum allowed consonants in a row
312     * @param rng            the source of randomness to be used
313     */
314    public WeightedLetterNamegen(String[] names, int consonantLimit, IStatefulRNG rng) {
315        this.names = names;
316        this.consonantLimit = consonantLimit;
317        this.rng = rng;
318        init();
319    }
320
321    /**
322     * Initialization, statistically measures letter likelihood.
323     */
324    private void init() {
325        sizes = new ArrayList<>();
326        letters = new HashMap<>();
327        firstLetterSamples = new ArrayList<>();
328        lastLetterSamples = new ArrayList<>();
329
330        for (int i = 0; i < names.length - 1; i++) {
331            String name = names[i];
332            if (name == null || name.length() < 1) {
333                continue;
334            }
335
336            // (1) Insert size
337            sizes.add(name.length());
338
339            // (2) Grab first letter
340            firstLetterSamples.add(name.charAt(0));
341
342            // (3) Grab last letter
343            lastLetterSamples.add(name.charAt(name.length() - 1));
344
345            // (4) Process all letters
346            for (int n = 0; n < name.length() - 1; n++) {
347                char letter = name.charAt(n);
348                char nextLetter = name.charAt(n + 1);
349
350                // Create letter if it doesn't exist
351                HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letter);
352                if (wl == null) {
353                    wl = new HashMap<>();
354                    letters.put(letter, wl);
355                }
356                ProbabilityTable<Character> wlg = wl.get(letter);
357                if (wlg == null) {
358                    wlg = new ProbabilityTable<>(rng.getState());
359                    wl.put(letter, wlg);
360                }
361                wlg.add(nextLetter, 1);
362
363                // If letter was uppercase (beginning of name), also add a lowercase entry
364                if (Category.Lu.contains(letter)) {
365                    letter = Character.toLowerCase(letter);
366
367                    wlg = wl.get(letter);
368                    if (wlg == null) {
369                        wlg = new ProbabilityTable<>(rng.getState());
370                        wl.put(letter, wlg);
371                    }
372                    wlg.add(nextLetter, 1);
373                }
374            }
375        }
376    }
377
378    private StringBuilder generateInner(StringBuilder name) {
379        for (int runs = 0; runs < LAST_LETTER_CANDIDATES_MAX; runs++) {
380            name.setLength(0);
381            // Pick size
382            int size = rng.getRandomElement(sizes);
383
384            // Pick first letter
385            char latest = rng.getRandomElement(firstLetterSamples);
386            name.append(latest);
387
388            for (int i = 1; i < size - 2; i++) {
389                name.append(latest = getRandomNextLetter(latest));
390            }
391
392            // Attempt to find a last letter
393            for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) {
394                char lastLetter = rng.getRandomElement(lastLetterSamples);
395                char intermediateLetterCandidate = getIntermediateLetter(latest, lastLetter);
396
397                // Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end)
398                if (Category.L.contains(intermediateLetterCandidate)) {
399                    name.append(intermediateLetterCandidate).append(lastLetter);
400                    break;
401                }
402            }
403
404            // Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher
405            if (validateGrouping(name) && checkLevenshtein(name)) {
406                return name;
407            }
408        }
409        name.setLength(0);
410        return name.append(rng.getRandomElement(names));
411    }
412    /**
413     * Gets one random String name.
414     *
415     * @return a single random String name
416     */
417
418    public String generate() {
419        return generateInner(new StringBuilder(32)).toString();
420    }
421
422    /**
423     * Gets an ArrayList of random String names, sized to match amountToGenerate.
424     * @param amountToGenerate how many String items to include in the returned ArrayList
425     * @return an ArrayList of random String names
426     */
427    public ArrayList<String> generateList(int amountToGenerate) {
428        ArrayList<String> result = new ArrayList<>();
429
430        StringBuilder name = new StringBuilder(32);
431        for (int i = 0; i < amountToGenerate; i++) {
432            result.add(generateInner(name).toString());
433        }
434
435        return result;
436    }
437    /**
438     * Gets an array of random String names, sized to match amountToGenerate.
439     *
440     * @param amountToGenerate how many String items to include in the returned array
441     * @return an array of random String names
442     */
443
444    public String[] generate(int amountToGenerate)
445    {
446        return generateList(amountToGenerate).toArray(new String[0]);
447    }
448
449    /**
450     * Searches for the best fit letter between the letter before and the letter
451     * after (non-random). Used to determine penultimate letters in names.
452     *
453     * @param   letterBefore    The letter before the desired letter.
454     * @param   letterAfter     The letter after the desired letter.
455     * @return  The best fit letter between the provided letters.
456     */
457    private char getIntermediateLetter(char letterBefore, char letterAfter) {
458        if (Category.L.contains(letterBefore) && Category.L.contains(letterAfter)) {
459            // First grab all letters that come after the 'letterBefore'
460            HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letterBefore);
461            if (wl == null) {
462                return getRandomNextLetter(letterBefore);
463            }
464            Set<Character> letterCandidates = wl.get(letterBefore).items();
465
466            char bestFitLetter = '\'';
467            int bestFitScore = 0;
468
469            // Step through candidates, and return best scoring letter
470            for (char letter : letterCandidates) {
471                wl = letters.get(letter);
472                if (wl == null) {
473                    continue;
474                }
475                ProbabilityTable<Character> weightedLetterGroup = wl.get(letterBefore);
476                if (weightedLetterGroup != null) {
477                    int letterCounter = weightedLetterGroup.weight(letterAfter);
478                    if (letterCounter > bestFitScore) {
479                        bestFitLetter = letter;
480                        bestFitScore = letterCounter;
481                    }
482                }
483            }
484
485            return bestFitLetter;
486        } else {
487            return '-';
488        }
489    }
490
491    /**
492     * Checks that no three letters happen in succession.
493     *
494     * @param   name    The name CharSequence
495     * @return  True if no triple letter sequence is found.
496     */
497    private boolean validateGrouping(CharSequence name) {
498        for (int i = 2; i < name.length(); i++) {
499            if (name.charAt(i) == name.charAt(i - 1) && name.charAt(i) == name.charAt(i - 2)) {
500                return false;
501            }
502        }
503        int consonants = 0;
504        for (int i = 0; i < name.length(); i++) {
505            if (isVowel(name.charAt(i))) {
506                consonants = 0;
507            } else {
508                if (++consonants > consonantLimit) {
509                    return false;
510                }
511            }
512        }
513        return true;
514    }
515
516    private boolean isVowel(char c) {
517        switch(c)
518        {
519            case 'a':
520            case 'e':
521            case 'i':
522            case 'o':
523            case 'u':
524                return true;
525            default:
526                return false;
527        }
528    }
529
530    /**
531     * Checks that the Damerau-Levenshtein distance of this name is within a
532     * given bias from a name on the master list.
533     *
534     * @param   name    The name string.
535     * @return  True if a name is found that is within the bias.
536     */
537    private boolean checkLevenshtein(CharSequence name) {
538        int levenshteinBias = name.length() / 2;
539
540        for (String name1 : names) {
541            int levenshteinDistance = dla.execute(name, name1);
542            if (levenshteinDistance <= levenshteinBias) {
543                return true;
544            }
545        }
546
547        return false;
548    }
549
550    private char getRandomNextLetter(char letter) {
551        if (letters.containsKey(letter)) {
552            return letters.get(letter).get(letter).random();
553        } else {
554            return vowels[rng.next(2)]; // 2 bits, so ranging from 0 to 3
555        }
556    }
557}