mirror of
https://github.com/ioistired/pleroma-ebooks.git
synced 2025-01-25 03:31:42 +01:00
added support for learning from CW'd posts
This commit is contained in:
parent
5dfd167bc0
commit
2cecfd42a5
@ -1,4 +1,5 @@
|
||||
{
|
||||
"site": "https://botsin.space",
|
||||
"cw": null
|
||||
"cw": null,
|
||||
"learn_from_cw": false
|
||||
}
|
@ -7,6 +7,8 @@ import markovify
|
||||
from bs4 import BeautifulSoup
|
||||
import re, multiprocessing, sqlite3, shutil, os, json
|
||||
|
||||
cfg = json.load(open('config.json'))
|
||||
|
||||
def make_sentence(output):
|
||||
class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences
|
||||
def test_sentence_input(self, sentence):
|
||||
@ -16,7 +18,10 @@ def make_sentence(output):
|
||||
db = sqlite3.connect("toots-copy.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
if cfg['learn_from_cw']:
|
||||
toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
else:
|
||||
toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
toots_str = ""
|
||||
for toot in toots:
|
||||
toots_str += "\n{}".format(toot[0])
|
||||
@ -32,7 +37,6 @@ def make_sentence(output):
|
||||
tries = tries + 1
|
||||
|
||||
sentence = re.sub("^(?:@\u202B[^ ]* )*", "", sentence) #remove leading pings (don't say "@bob blah blah" but still say "blah @bob blah")
|
||||
sentence = re.sub("^(?:@\u200B[^ ]* )*", "", sentence)
|
||||
|
||||
output.send(sentence)
|
||||
|
||||
|
17
main.py
17
main.py
@ -18,7 +18,8 @@ except:
|
||||
shutil.copy2("config.sample.json", "config.json")
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
|
||||
#config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults.
|
||||
#config.json should contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults.
|
||||
# TODO: this is pretty messy
|
||||
if 'site' not in cfg:
|
||||
cfg['website'] = "https://botsin.space"
|
||||
if 'cw' not in cfg:
|
||||
@ -28,6 +29,8 @@ if 'instance_blacklist' not in cfg:
|
||||
"bofa.lol",
|
||||
"witches.town"
|
||||
]
|
||||
if 'learn_from_cw' not in cfg:
|
||||
cfg['learn_from_cw'] = False
|
||||
|
||||
#if the user is using a (very!) old version that still uses the .secret files, migrate to the new method
|
||||
if os.path.exists("clientcred.secret"):
|
||||
@ -82,7 +85,11 @@ following = client.account_following(me.id)
|
||||
db = sqlite3.connect("toots.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
|
||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, cw INT NOT NULL DEFAULT 0, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
|
||||
try:
|
||||
c.execute("ALTER TABLE `toots` ADD COLUMN cw INT NOT NULL DEFAULT 0")
|
||||
except:
|
||||
pass # column already exists
|
||||
db.commit()
|
||||
|
||||
def handleCtrlC(signal, frame):
|
||||
@ -157,9 +164,6 @@ for f in following:
|
||||
|
||||
# its a toost baby
|
||||
content = oi['object']['content']
|
||||
if oi['object']['summary'] != None and oi['object']['summary'] != "":
|
||||
#don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this)
|
||||
continue
|
||||
toot = extract_toot(content)
|
||||
# print(toot)
|
||||
try:
|
||||
@ -170,8 +174,9 @@ for f in following:
|
||||
done = True
|
||||
break
|
||||
pid = patterns["pid"].search(oi['object']['id']).group(0)
|
||||
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", (
|
||||
c.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?)", (
|
||||
pid,
|
||||
1 if (oi['object']['summary'] != None and oi['object']['summary'] != "") else 0,
|
||||
f.id,
|
||||
oi['object']['id'],
|
||||
toot
|
||||
|
Loading…
x
Reference in New Issue
Block a user