-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.go
More file actions
129 lines (113 loc) · 2.75 KB
/
main.go
File metadata and controls
129 lines (113 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package main
import (
"bufio"
"fmt"
"math/rand"
"os"
"strings"
"time"
)
var source_file = "./source.txt"
func get_source_data() (data []string) {
f, err := os.Open(source_file)
if err != nil {
panic(err)
}
defer f.Close()
scanner := bufio.NewScanner(f)
var line string
for scanner.Scan() {
line = scanner.Text()
data = append(data, line)
}
return
}
func generate_name(analysis map[rune]map[rune]float64) (name string) {
// Q: does it matter about not sorting the keys / are these probabilities working as expected?
// I think so, the order doesn't matter... ??
char := '_'
for {
pos := 0.0
d := rand.Float64()
for x := range analysis[char] {
pos += analysis[char][x]
if pos >= d {
if x == '.' {
return
}
name += string(x)
char = x
break
}
}
}
}
// Unused
func visualise(analysis map[rune]map[rune]float64) {
for m := range analysis {
if len(analysis[m]) > 0 {
fmt.Println(string(m))
for x := range analysis[m] {
fmt.Println(" ", string(x), " => ", analysis[m][x])
}
}
}
}
func main() {
data := get_source_data()
analysis := make(map[rune]map[rune]float64)
// set up some empty maps for convenience:
alpha := "abcdefghijklmnopqrstuvwxyz_" // _ is a symbol for the start of a chain
for i := range alpha {
a := rune(alpha[i])
analysis[a] = make(map[rune]float64)
}
for _, name := range data {
// I only want to deal with lowercase...
name = strings.ToLower(name)
// deal with the start character, transition from "_" to this start character:
start_char := rune(name[0])
// uninitialised values are zero by default, so no need to be very elaborate...
analysis['_'][start_char] = analysis['_'][start_char] + 1
// now deal with the remaining characters.
// for each character, we construct a map of what the next characters are.
for i, c := range name {
if i == len(name)-1 {
// add an ending character "." for this character's map.
analysis[c]['.'] = analysis[c]['.'] + 1
} else {
next_char := rune(name[i+1])
analysis[c][next_char] = analysis[c][next_char] + 1
}
}
}
// At this stage I have counts for each character in the chain
// Now I convert the counts into probabilities
for m := range analysis {
if len(analysis[m]) > 0 {
total := 0.0
for x := range analysis[m] {
total += analysis[m][x]
}
for x := range analysis[m] {
f := analysis[m][x] / total
analysis[m][x] = f
}
} else {
delete(analysis, m)
}
}
// And now I generate the glorious names:
rand.Seed(time.Now().UnixNano())
for i := 0; i < 10; i++ {
// For some reason it sometimes comes back with a 1-letter name, which is no good...
n := " "
for {
n = generate_name(analysis)
if len(n) > 1 {
break
}
}
fmt.Println(strings.Title(n))
}
}