initial

2026-02-24 21:15:27 -05:00
commit 59c1d76a4b
16 changed files with 111808 additions and 0 deletions
--- a/Generate_Model.py
+++ b/Generate_Model.py
@@ -0,0 +1,65 @@
 import json
 import argparse
 import os
 def get_token(file):
    punct_tokens = '",.!?;:-_'
    word = ""
    for line in file:
        for c in line:
            if c.isspace():
                if word != "": yield word
                word = ""
                continue
            if c in punct_tokens:
                yield word
                yield c
                word = ""
            else:
                word += c
 def update_model(model, file):
    token_stream = get_token(file)
    former = next(token_stream)
    current = next(token_stream)
    while True:
        try:
            prior = next(token_stream)
        except:
            break
        if former not in model:
            model[former] = { "_count": 1 }
        else:
            model[former]["_count"] += 1
        if current not in model[former]:
            model[former][current] = { "_count": 1 }
        else:
            model[former][current]["_count"] += 1
        if prior not in model[former][current]:
            model[former][current][prior] = { "_count": 1 }
        else:
            model[former][current][prior]["_count"] += 1
        former = current
        current = prior
 def main(args):
    if args.model:
        with open(args.model) as m_file: model = json.load(m_file)
    else:
        model = {}
    if os.path.isdir(args.input):
        files = os.listdir(args.input)
        files = list(map(lambda d: f"{args.input}/{d}", files))
    else:
        files = [args.input]
    for f_name in files:
        with open(f_name, 'r', encoding='utf-8') as file: update_model(model, file)
    with open(args.out, "w") as m_file: json.dump(model, m_file)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('input')
    parser.add_argument('-m', '--model', default='')
    parser.add_argument('-o', '--out', default='out.json')
    main(parser.parse_args())
--- a/Markov.py
+++ b/Markov.py
@@ -0,0 +1,41 @@
 import random
 import json
 import argparse
 def select(model: dict):
    m_filtered = list(filter(lambda i: i[0] != "_count", model.items()))
    return random.choices(
        list(map(lambda i: i[0], m_filtered)),
        list(map(lambda i: i[1]["_count"], m_filtered)),
        k=1
    )[0]
 def generate_token(model, former, current):
    p = random.random()
    if p <= 0.90:
        try:
            return select(model[former][current])
        except: pass
    if p <= 0.98:
        try:
            return select(model[current])
        except: pass
    return select(model)
 def main(args):
    with open(args.model) as m_file: model = json.load(m_file)
    former = select(model)
    current = select(model[former])
    print(f"{former} {current} ", end="")
    for _ in range(args.size - 2):
        prior = generate_token(model, former, current)
        print(prior + " ", end="")
        former = current
        current = prior
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('model')
    parser.add_argument('-s', '--size', default=100, type=int)
    main(parser.parse_args())
--- a/helpy.py
+++ b/helpy.py
@@ -0,0 +1,13 @@
 import wx
 class MainWindow(wx.Frame):
    def __init__(self, parent, title):
        wx.Frame.__init__(self, parent, title=title, size=(200,100))
        self.control = wx.TextCtrl(self, style=wx.TE_MULTILINE)
        self.CreateStatusBar() # A Statusbar in the bottom of the window
        self.Show(True)
 app = wx.App(False)
 frame = MainWindow(None, "Sample editor")
 app.MainLoop()
--- a/out.json
+++ b/out.json
--- a/potter_sample.txt
+++ b/potter_sample.txt
--- a/simple.txt
+++ b/simple.txt
--- a/test.json
+++ b/test.json
@@ -0,0 +1 @@
 {"Hello": {"_count": 3, ",": {"_count": 2, "this": {"_count": 2}}, "this": {"_count": 1, "is": {"_count": 1}}}, ",": {"_count": 3, "this": {"_count": 3, "is": {"_count": 3}}}, "this": {"_count": 4, "is": {"_count": 4, "Liz": {"_count": 3}, ",": {"_count": 1}}}, "is": {"_count": 5, "Liz": {"_count": 3, "Miller": {"_count": 3}}, "a": {"_count": 1, "small": {"_count": 1}}, ",": {"_count": 1, "this": {"_count": 1}}}, "Liz": {"_count": 3, "Miller": {"_count": 3, ".": {"_count": 2}, "This": {"_count": 1}}}, "Miller": {"_count": 3, ".": {"_count": 2, "This": {"_count": 1}, "Hello": {"_count": 1}}, "This": {"_count": 1, "is": {"_count": 1}}}, ".": {"_count": 3, "This": {"_count": 1, "is": {"_count": 1}}, "Hello": {"_count": 2, "this": {"_count": 1}, ",": {"_count": 1}}}, "This": {"_count": 2, "is": {"_count": 2, "a": {"_count": 2}}}, "a": {"_count": 2, "small": {"_count": 1, "test": {"_count": 1}}, "trigram": {"_count": 1, "markov": {"_count": 1}}}, "small": {"_count": 1, "test": {"_count": 1, "text": {"_count": 1}}}, "test": {"_count": 1, "text": {"_count": 1, "from": {"_count": 1}}}, "text": {"_count": 1, "from": {"_count": 1, "which": {"_count": 1}}}, "from": {"_count": 1, "which": {"_count": 1, "to": {"_count": 1}}}, "which": {"_count": 1, "to": {"_count": 1, "generate": {"_count": 1}}}, "to": {"_count": 1, "generate": {"_count": 1, "a": {"_count": 1}}}, "generate": {"_count": 1, "a": {"_count": 1, "trigram": {"_count": 1}}}, "trigram": {"_count": 1, "markov": {"_count": 1, "language": {"_count": 1}}}, "markov": {"_count": 1, "language": {"_count": 1, "model": {"_count": 1}}}, "language": {"_count": 1, "model": {"_count": 1, ".": {"_count": 1}}}, "model": {"_count": 1, ".": {"_count": 1, "Hello": {"_count": 1}}}}
--- a/test.txt
+++ b/test.txt
@@ -0,0 +1,2 @@
 Hello, this is Liz Miller. This is a small test text from which to generate a trigram markov language model.
 Hello this is, this is Liz Miller. Hello, this is Liz Miller This is a small
--- a/text/1.txt
+++ b/text/1.txt
--- a/text/2.txt
+++ b/text/2.txt
--- a/text/3.txt
+++ b/text/3.txt
--- a/text/4.txt
+++ b/text/4.txt
--- a/text/5.txt
+++ b/text/5.txt
--- a/text/Harry_Potter_all_books_preprocessed.txt
+++ b/text/Harry_Potter_all_books_preprocessed.txt
--- a/text/frank.txt
+++ b/text/frank.txt
--- a/text/ulysses.txt
+++ b/text/ulysses.txt
		`@@ -0,0 +1 @@`
							{"Hello": {"_count": 3, ",": {"_count": 2, "this": {"_count": 2}}, "this": {"_count": 1, "is": {"_count": 1}}}, ",": {"_count": 3, "this": {"_count": 3, "is": {"_count": 3}}}, "this": {"_count": 4, "is": {"_count": 4, "Liz": {"_count": 3}, ",": {"_count": 1}}}, "is": {"_count": 5, "Liz": {"_count": 3, "Miller": {"_count": 3}}, "a": {"_count": 1, "small": {"_count": 1}}, ",": {"_count": 1, "this": {"_count": 1}}}, "Liz": {"_count": 3, "Miller": {"_count": 3, ".": {"_count": 2}, "This": {"_count": 1}}}, "Miller": {"_count": 3, ".": {"_count": 2, "This": {"_count": 1}, "Hello": {"_count": 1}}, "This": {"_count": 1, "is": {"_count": 1}}}, ".": {"_count": 3, "This": {"_count": 1, "is": {"_count": 1}}, "Hello": {"_count": 2, "this": {"_count": 1}, ",": {"_count": 1}}}, "This": {"_count": 2, "is": {"_count": 2, "a": {"_count": 2}}}, "a": {"_count": 2, "small": {"_count": 1, "test": {"_count": 1}}, "trigram": {"_count": 1, "markov": {"_count": 1}}}, "small": {"_count": 1, "test": {"_count": 1, "text": {"_count": 1}}}, "test": {"_count": 1, "text": {"_count": 1, "from": {"_count": 1}}}, "text": {"_count": 1, "from": {"_count": 1, "which": {"_count": 1}}}, "from": {"_count": 1, "which": {"_count": 1, "to": {"_count": 1}}}, "which": {"_count": 1, "to": {"_count": 1, "generate": {"_count": 1}}}, "to": {"_count": 1, "generate": {"_count": 1, "a": {"_count": 1}}}, "generate": {"_count": 1, "a": {"_count": 1, "trigram": {"_count": 1}}}, "trigram": {"_count": 1, "markov": {"_count": 1, "language": {"_count": 1}}}, "markov": {"_count": 1, "language": {"_count": 1, "model": {"_count": 1}}}, "language": {"_count": 1, "model": {"_count": 1, ".": {"_count": 1}}}, "model": {"_count": 1, ".": {"_count": 1, "Hello": {"_count": 1}}}}
		`@@ -0,0 +1,2 @@`
							`Hello, this is Liz Miller. This is a small test text from which to generate a trigram markov language model.`
							`Hello this is, this is Liz Miller. Hello, this is Liz Miller This is a small`