flokati/modules/markov.go

282 lines
7.0 KiB
Go
Raw Normal View History

2016-11-01 17:34:05 +00:00
// vi:ts=4:sts=4:sw=4:noet:tw=72
package modules
// This Markov chain code is taken from the "Generating arbitrary text"
// codewalk: http://golang.org/doc/codewalk/markov/
//
// Minor modifications have been made to make it easier to integrate
// with a webserver and to save/load state
import (
"bufio"
"encoding/gob"
"flag"
2016-11-01 17:34:05 +00:00
"fmt"
"math/rand"
"os"
"regexp"
"strings"
"sync"
"time"
2017-12-15 08:59:28 +00:00
"git.dnix.de/an/xlog"
2016-11-01 17:34:05 +00:00
)
2019-08-01 13:12:58 +00:00
var *markovPrefixLen = 3
var (
2019-08-01 13:07:51 +00:00
//markovPrefixLen = flag.Int("markov_prefix_len", 3, "markov: prefix len")
markovAnswerLen = flag.Int("markov_answer_len", 10, "markov: answer len")
markovResponseChance = flag.Int("markov_response_chance", 10, "markov: chance to get an answer (percent)")
markovStateFile = flag.String("markov_state_file", "state.dat", "markov: state file")
markovTrainFile = flag.String("markov_train_file", "train.txt", "markov: training file")
)
2016-11-01 17:34:05 +00:00
var markovChain *MarkovChain
func init() {
MsgFuncs["markov"] = markovHandleMessage
RunFuncs["markov"] = markovRun
}
2017-06-28 21:28:39 +00:00
func markovHandleMessage(m *Message) {
text := m.Text
2016-11-01 17:34:05 +00:00
if text == "" {
return
}
text = markovParseText(text)
2017-06-28 11:08:23 +00:00
if rand.Intn(100) <= *markovResponseChance || strings.Index(text, BotNick) != -1 {
responseText := markovChain.Generate(*markovAnswerLen, text)
2016-11-01 17:34:05 +00:00
if responseText != "" {
go func() {
time.Sleep(time.Duration(rand.Intn(8)+2) * time.Second)
2017-06-28 21:28:39 +00:00
SayCh <- m.Channel + "\n" + responseText
2016-11-01 17:34:05 +00:00
}()
}
}
markovChain.Write(text)
2016-11-01 17:34:05 +00:00
}
func markovRun() {
markovChain = markovNewChain(*markovPrefixLen)
err := markovChain.Load(*markovStateFile)
if err != nil {
xlog.Error(err.Error())
}
filepath := *markovTrainFile
2016-11-01 17:34:05 +00:00
if filepath != "-" {
file, _ := os.Open(filepath)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
text := scanner.Text()
text = markovParseText(text)
if text != "" {
markovChain.Write(text)
}
}
}
go func() {
for {
time.Sleep(60 * time.Second)
markovChain.Save(*markovStateFile)
2016-11-01 17:34:05 +00:00
}
}()
}
func markovParseText(text string) string {
messageRegex := regexp.MustCompile(`<([^>]+)>`)
matches := messageRegex.FindAllStringSubmatch(text, -1)
for _, matches2 := range matches {
if strings.HasPrefix(matches2[1], "http") || strings.HasPrefix(matches2[1], "mailto") {
text = strings.Replace(text, matches2[0], "", -1)
} else if strings.HasPrefix(matches2[1], "@U") {
parts := strings.SplitN(matches2[1], "|", 2)
if len(parts) == 2 {
text = strings.Replace(text, matches2[0], "@"+parts[1], -1)
} else {
text = strings.Replace(text, matches2[0], "", -1)
}
} else if strings.HasPrefix(matches2[1], "@") {
text = strings.Replace(text, matches2[0], matches2[1], -1)
} else if strings.HasPrefix(matches2[1], "#") {
parts := strings.SplitN(matches2[1], "|", 2)
if len(parts) == 2 {
text = strings.Replace(text, matches2[0], "#"+parts[1], -1)
} else {
text = strings.Replace(text, matches2[0], "", -1)
}
}
}
text = strings.TrimSpace(text)
text = strings.Replace(text, "&lt;", "<", -1)
text = strings.Replace(text, "&gt;", ">", -1)
text = strings.Replace(text, "&amp;", "&", -1)
text = strings.Replace(text, ",", " ", -1)
2016-11-01 17:34:05 +00:00
return strings.ToLower(text)
2016-11-01 17:34:05 +00:00
}
// Prefix is a Markov chain prefix of one or more words.
type MarkovPrefix []string
// String returns the Prefix as a string (for use as a map key).
func (p MarkovPrefix) String() string {
return strings.Trim(strings.Join(p, " "), " ")
2016-11-01 17:34:05 +00:00
}
// Shift removes the first word from the Prefix and appends the given word.
func (p MarkovPrefix) Shift(word string) {
copy(p, p[1:])
p[len(p)-1] = word
}
// MarkovChain contains a map ("chain") of prefixes to a list of suffixes.
// A prefix is a string of prefixLen words joined with spaces.
// A suffix is a single word. A prefix can have multiple suffixes.
type MarkovChain struct {
MarkovChain map[string][]string
prefixLen int
mu sync.Mutex
}
// NewMarkovChain returns a new MarkovChain with prefixes of prefixLen words.
func markovNewChain(prefixLen int) *MarkovChain {
return &MarkovChain{
MarkovChain: make(map[string][]string),
prefixLen: prefixLen,
}
}
// Write parses the bytes into prefixes and suffixes that are stored in MarkovChain.
func (c *MarkovChain) Write(in string) (int, error) {
in = strings.ToLower(in)
2017-06-28 11:08:23 +00:00
if strings.HasPrefix(in, BotNick) {
tok := strings.Split(in, " ")
in = strings.Replace(in, tok[0]+" ", "", 1)
}
2016-11-01 17:34:05 +00:00
sr := strings.NewReader(in)
p := make(MarkovPrefix, c.prefixLen)
for {
var s string
if _, err := fmt.Fscan(sr, &s); err != nil {
break
}
key := p.String()
c.mu.Lock()
c.MarkovChain[key] = append(c.MarkovChain[key], s)
c.mu.Unlock()
xlog.Debug("Chain len: %d, learned [%s] [%s]", len(c.MarkovChain), key, s)
2016-11-01 17:34:05 +00:00
p.Shift(s)
}
return len(in), nil
}
// Generate returns a string of at most n words generated from MarkovChain.
func (c *MarkovChain) Generate(n int, in string) string {
in = strings.ToLower(in)
2017-06-28 11:08:23 +00:00
if strings.HasPrefix(in, BotNick) {
tok := strings.Split(in, " ")
in = strings.Replace(in, tok[0]+" ", "", 1)
}
2016-11-01 17:34:05 +00:00
c.mu.Lock()
defer c.mu.Unlock()
2016-11-26 23:34:25 +00:00
var p MarkovPrefix
2016-11-01 17:34:05 +00:00
var words []string
2016-11-26 23:34:25 +00:00
var start string
for attempt := 0; attempt < 10; attempt++ {
/*
p = make(MarkovPrefix, c.prefixLen)
p = strings.Split(in, " ")
if len(p) > c.prefixLen {
i := rand.Intn(len(p) - 2)
p = p[i : i+c.prefixLen]
}
*/
p = make(MarkovPrefix, 1)
inWords := strings.Split(in, " ")
start = inWords[rand.Intn(len(inWords))]
p[0] = start
//ss = p.String()
xlog.Debug("Looking for answer on [%s]", start)
for i := 0; i < n; i++ {
choices := c.MarkovChain[p.String()]
if len(choices) == 0 {
break
}
next := choices[rand.Intn(len(choices))]
words = append(words, next)
if strings.HasSuffix(next, ".") || strings.HasSuffix(next, "!") || strings.HasSuffix(next, "?") {
break
}
p.Shift(next)
2016-11-01 17:34:05 +00:00
}
2016-11-26 23:34:25 +00:00
if len(words) > 0 {
break
}
2016-11-01 17:34:05 +00:00
}
2016-11-26 23:34:25 +00:00
start = strings.Trim(start, " ")
2016-11-01 17:34:05 +00:00
if len(words) == 0 {
xlog.Debug("No answer found")
2016-11-26 23:34:25 +00:00
return start + " ... pfrrrz"
2016-11-01 17:34:05 +00:00
} else {
xlog.Debug("Found words: [%s]", strings.Join(words, " "))
2016-11-26 23:34:25 +00:00
return start + " " + strings.Join(words, " ")
2016-11-01 17:34:05 +00:00
}
}
// Save the chain to a file
func (c *MarkovChain) Save(fileName string) error {
// Open the file for writing
fo, err := os.Create(fileName)
if err != nil {
return err
}
// close fo on exit and check for its returned error
defer func() {
if err := fo.Close(); err != nil {
panic(err)
}
}()
// Create an encoder and dump to it
c.mu.Lock()
defer c.mu.Unlock()
enc := gob.NewEncoder(fo)
err = enc.Encode(c)
if err != nil {
return err
}
return nil
}
// Load the chain from a file
func (c *MarkovChain) Load(fileName string) error {
// Open the file for reading
fi, err := os.Open(fileName)
if err != nil {
return err
}
// close fi on exit and check for its returned error
defer func() {
if err := fi.Close(); err != nil {
panic(err)
}
}()
// Create a decoder and read from it
c.mu.Lock()
defer c.mu.Unlock()
dec := gob.NewDecoder(fi)
err = dec.Decode(c)
if err != nil {
return err
}
return nil
}