mirror of
https://github.com/ioistired/pleroma-ebooks.git
synced 2024-11-20 02:14:52 +01:00
Merge branch 'master' into master
This commit is contained in:
commit
64b49da4eb
3
.gitignore
vendored
3
.gitignore
vendored
@ -7,3 +7,6 @@ toots.db
|
||||
toots.db-journal
|
||||
toots.db-wal
|
||||
__pycache__/*
|
||||
.vscode/
|
||||
.editorconfig
|
||||
.*.swp
|
||||
|
@ -1 +1,4 @@
|
||||
{"site":"https://botsin.space","cw":null}
|
||||
{
|
||||
"site": "https://botsin.space",
|
||||
"cw": null
|
||||
}
|
62
create.py
62
create.py
@ -1,62 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import markovify
|
||||
import json
|
||||
import re, random, multiprocessing, time, sqlite3, shutil, os
|
||||
|
||||
def make_sentence(output):
|
||||
class nlt_fixed(markovify.NewlineText):
|
||||
def test_sentence_input(self, sentence):
|
||||
return True #all sentences are valid <3
|
||||
|
||||
# with open("corpus.txt", encoding="utf-8") as fp:
|
||||
# model = nlt_fixed(fp.read())
|
||||
|
||||
shutil.copyfile("toots.db", "toots-copy.db")
|
||||
db = sqlite3.connect("toots-copy.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
toots = c.execute("SELECT content FROM `toots`").fetchall()
|
||||
toots_str = ""
|
||||
for toot in toots:
|
||||
toots_str += "\n{}".format(toot[0])
|
||||
model = nlt_fixed(toots_str)
|
||||
toots_str = None
|
||||
db.close()
|
||||
os.remove("toots-copy.db")
|
||||
|
||||
sentence = None
|
||||
tries = 0
|
||||
while sentence is None and tries < 10:
|
||||
sentence = model.make_short_sentence(500, tries=10000)
|
||||
tries = tries + 1
|
||||
sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
|
||||
output.send(sentence)
|
||||
|
||||
def make_toot(force_markov = False, args = None):
|
||||
return make_toot_markov()
|
||||
|
||||
def make_toot_markov(query = None):
|
||||
tries = 0
|
||||
toot = None
|
||||
while toot == None and tries < 25:
|
||||
pin, pout = multiprocessing.Pipe(False)
|
||||
p = multiprocessing.Process(target = make_sentence, args = [pout])
|
||||
p.start()
|
||||
p.join(10)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
toot = None
|
||||
tries = tries + 1
|
||||
else:
|
||||
toot = pin.recv()
|
||||
if toot == None:
|
||||
toot = "Toot generation failed! Contact Lynne for assistance."
|
||||
return {
|
||||
"toot":toot,
|
||||
"media":None
|
||||
}
|
87
functions.py
Executable file
87
functions.py
Executable file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import markovify
|
||||
from bs4 import BeautifulSoup
|
||||
import re, multiprocessing, sqlite3, shutil, os, json
|
||||
|
||||
def make_sentence(output):
|
||||
class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences
|
||||
def test_sentence_input(self, sentence):
|
||||
return True #all sentences are valid <3
|
||||
|
||||
shutil.copyfile("toots.db", "toots-copy.db") #create a copy of the database because reply.py will be using the main one
|
||||
db = sqlite3.connect("toots-copy.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||
toots_str = ""
|
||||
for toot in toots:
|
||||
toots_str += "\n{}".format(toot[0])
|
||||
model = nlt_fixed(toots_str)
|
||||
toots_str = None
|
||||
db.close()
|
||||
os.remove("toots-copy.db")
|
||||
|
||||
sentence = None
|
||||
tries = 0
|
||||
while sentence is None and tries < 10:
|
||||
sentence = model.make_short_sentence(500, tries=10000)
|
||||
tries = tries + 1
|
||||
|
||||
sentence = re.sub("^(?:@\u202B[^ ]* )*", "", sentence) #remove leading pings (don't say "@bob blah blah" but still say "blah @bob blah")
|
||||
sentence = re.sub("^(?:@\u200B[^ ]* )*", "", sentence)
|
||||
|
||||
output.send(sentence)
|
||||
|
||||
def make_toot(force_markov = False, args = None):
|
||||
return make_toot_markov()
|
||||
|
||||
def make_toot_markov(query = None):
|
||||
tries = 0
|
||||
toot = None
|
||||
while toot == None and tries < 10: #try to make a toot 10 times
|
||||
pin, pout = multiprocessing.Pipe(False)
|
||||
p = multiprocessing.Process(target = make_sentence, args = [pout])
|
||||
p.start()
|
||||
p.join(10) #wait 10 seconds to get something
|
||||
if p.is_alive(): #if it's still trying to make a toot after 10 seconds
|
||||
p.terminate()
|
||||
p.join()
|
||||
toot = None
|
||||
tries = tries + 1 #give up, and increment tries by one
|
||||
else:
|
||||
toot = pin.recv()
|
||||
if toot == None: #if we've tried and failed ten times, just give up
|
||||
toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance."
|
||||
return {
|
||||
"toot": toot,
|
||||
"media": None
|
||||
}
|
||||
|
||||
def extract_toot(toot):
|
||||
toot = toot.replace("'", "'") #convert HTML stuff to normal stuff
|
||||
toot = toot.replace(""", '"') #ditto
|
||||
soup = BeautifulSoup(toot, "html.parser")
|
||||
for lb in soup.select("br"): #replace <br> with linebreak
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
|
||||
for p in soup.select("p"): #ditto for <p>
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
|
||||
for ht in soup.select("a.hashtag"): #make hashtags no longer links, just text
|
||||
ht.unwrap()
|
||||
|
||||
for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com
|
||||
link.insert_after(link["href"])
|
||||
link.decompose()
|
||||
|
||||
text = soup.get_text()
|
||||
text = re.sub("https://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mastodon-style mentions back in
|
||||
text = re.sub("https://([^/]+)/users/([^ ]+)", r"@\2@\1", text) #put pleroma-style mentions back in
|
||||
text = text.rstrip("\n") #remove trailing newline
|
||||
return text
|
13
gen.py
13
gen.py
@ -5,13 +5,11 @@
|
||||
|
||||
from mastodon import Mastodon
|
||||
import argparse, sys, traceback, json
|
||||
import create
|
||||
import functions
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate and post a toot.')
|
||||
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
|
||||
help='ID of the status to reply to')
|
||||
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
|
||||
help="Print the toot to stdout without posting it")
|
||||
help="Print the toot without actually posting it. Use this to make sure your bot's actually working.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -24,7 +22,7 @@ client = Mastodon(
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
toot = create.make_toot()
|
||||
toot = functions.make_toot()
|
||||
if not args.simulate:
|
||||
try:
|
||||
if toot['media'] != None:
|
||||
@ -35,10 +33,7 @@ if __name__ == '__main__':
|
||||
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
|
||||
except Exception as err:
|
||||
toot = {
|
||||
"toot":
|
||||
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
|
||||
+ " wrong! While attempting to post a toot, I received the following" \
|
||||
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
|
||||
"toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw'])
|
||||
}
|
||||
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
|
||||
print(toot['toot'])
|
||||
|
143
main.py
143
main.py
@ -7,26 +7,43 @@
|
||||
from mastodon import Mastodon
|
||||
from os import path
|
||||
from bs4 import BeautifulSoup
|
||||
import os, sqlite3, signal, sys, json, re
|
||||
import os, sqlite3, signal, sys, json, re, shutil
|
||||
import requests
|
||||
import functions
|
||||
|
||||
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
try:
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
except:
|
||||
shutil.copy2("config.sample.json", "config.json")
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
|
||||
#config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults.
|
||||
if 'site' not in cfg:
|
||||
cfg['website'] = "https://botsin.space"
|
||||
if 'cw' not in cfg:
|
||||
cfg['cw'] = None
|
||||
if 'instance_blacklist' not in cfg:
|
||||
cfg["instance_blacklist"] = [
|
||||
"bofa.lol",
|
||||
"witches.town"
|
||||
]
|
||||
|
||||
#if the user is using a (very!) old version that still uses the .secret files, migrate to the new method
|
||||
if os.path.exists("clientcred.secret"):
|
||||
print("Upgrading to new storage method")
|
||||
cc = open("clientcred.secret").read().split("\n")
|
||||
cfg['client'] = {
|
||||
"id": cc[0],
|
||||
"secret": cc[1]
|
||||
}
|
||||
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
||||
os.remove("clientcred.secret")
|
||||
os.remove("usercred.secret")
|
||||
print("Upgrading to new storage method")
|
||||
cc = open("clientcred.secret").read().split("\n")
|
||||
cfg['client'] = {
|
||||
"id": cc[0],
|
||||
"secret": cc[1]
|
||||
}
|
||||
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
||||
os.remove("clientcred.secret")
|
||||
os.remove("usercred.secret")
|
||||
|
||||
|
||||
if "client" not in cfg:
|
||||
print("No client credentials, registering application")
|
||||
print("No application info -- registering application with {}".format(cfg['site']))
|
||||
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
|
||||
api_base_url=cfg['site'],
|
||||
scopes=scopes,
|
||||
@ -38,47 +55,18 @@ if "client" not in cfg:
|
||||
}
|
||||
|
||||
if "secret" not in cfg:
|
||||
print("No user credentials, logging in")
|
||||
print("No user credentials -- logging in to {}".format(cfg['site']))
|
||||
client = Mastodon(client_id = cfg['client']['id'],
|
||||
client_secret = cfg['client']['secret'],
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
|
||||
print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes)))
|
||||
cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
|
||||
|
||||
json.dump(cfg, open("config.json", "w+"))
|
||||
|
||||
def extract_toot(toot):
|
||||
toot = toot.replace("'", "'")
|
||||
toot = toot.replace(""", '"')
|
||||
soup = BeautifulSoup(toot, "html.parser")
|
||||
|
||||
# this is the code that removes all mentions
|
||||
for mention in soup.select("span.h-card"):
|
||||
mention.a.unwrap()
|
||||
mention.span.unwrap()
|
||||
|
||||
# replace <br> with linebreak
|
||||
for lb in soup.select("br"):
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
|
||||
# replace <p> with linebreak
|
||||
for p in soup.select("p"):
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
|
||||
# fix hashtags
|
||||
for ht in soup.select("a.hashtag"):
|
||||
ht.unwrap()
|
||||
|
||||
# fix links
|
||||
for link in soup.select("a"):
|
||||
link.insert_after(link["href"])
|
||||
link.decompose()
|
||||
|
||||
toot = soup.get_text()
|
||||
toot = toot.rstrip("\n") #remove trailing newline
|
||||
toot = functions.extract_toot(toot)
|
||||
toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
|
||||
return(toot)
|
||||
|
||||
@ -104,25 +92,12 @@ def handleCtrlC(signal, frame):
|
||||
|
||||
signal.signal(signal.SIGINT, handleCtrlC)
|
||||
|
||||
def get_toots_legacy(client, id):
|
||||
i = 0
|
||||
toots = client.account_statuses(id)
|
||||
while toots is not None and len(toots) > 0:
|
||||
for toot in toots:
|
||||
if toot.spoiler_text != "": continue
|
||||
if toot.reblog is not None: continue
|
||||
if toot.visibility not in ["public", "unlisted"]: continue
|
||||
t = extract_toot(toot.content)
|
||||
if t != None:
|
||||
yield {
|
||||
"toot": t,
|
||||
"id": toot.id,
|
||||
"uri": toot.uri
|
||||
}
|
||||
toots = client.fetch_next(toots)
|
||||
i += 1
|
||||
if i%20 == 0:
|
||||
print('.', end='', flush=True)
|
||||
patterns = {
|
||||
"handle": re.compile(r"^.*@(.+)"),
|
||||
"url": re.compile(r"https?:\/\/(.*)"),
|
||||
"uri": re.compile(r'template="([^"]+)"'),
|
||||
"pid": re.compile(r"[^\/]+$"),
|
||||
}
|
||||
|
||||
for f in following:
|
||||
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
||||
@ -133,28 +108,27 @@ for f in following:
|
||||
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
|
||||
|
||||
#find the user's activitypub outbox
|
||||
print("WebFingering...")
|
||||
instance = re.search(r"^.*@(.+)", f.acct)
|
||||
print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)")
|
||||
instance = patterns["handle"].search(f.acct)
|
||||
if instance == None:
|
||||
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
|
||||
instance = patterns["url"].search(cfg['site']).group(1)
|
||||
else:
|
||||
instance = instance.group(1)
|
||||
|
||||
if instance == "bofa.lol":
|
||||
print("rest in piece bofa, skipping")
|
||||
if instance in cfg['instance_blacklist']:
|
||||
print("skipping blacklisted instance: {}".format(instance))
|
||||
continue
|
||||
|
||||
# print("{} is on {}".format(f.acct, instance))
|
||||
|
||||
try:
|
||||
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
|
||||
uri = re.search(r'template="([^"]+)"', r.text).group(1)
|
||||
uri = patterns["uri"].search(r.text).group(1)
|
||||
uri = uri.format(uri = "{}@{}".format(f.username, instance))
|
||||
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
|
||||
j = r.json()
|
||||
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
|
||||
uri = j['aliases'][0]
|
||||
else:
|
||||
uri = j['aliases'][1]
|
||||
for link in j['links']:
|
||||
if link['rel'] == 'self':
|
||||
#this is a link formatted like "https://instan.ce/users/username", which is what we need
|
||||
uri = link['href']
|
||||
uri = "{}/outbox?page=true".format(uri)
|
||||
r = requests.get(uri, timeout=10)
|
||||
j = r.json()
|
||||
@ -168,23 +142,23 @@ for f in following:
|
||||
pleroma = True
|
||||
j = j['first']
|
||||
else:
|
||||
print("Mastodon instance detected")
|
||||
print("Mastodon/Misskey instance detected")
|
||||
uri = "{}&min_id={}".format(uri, last_toot)
|
||||
r = requests.get(uri)
|
||||
j = r.json()
|
||||
|
||||
print("Downloading and parsing toots", end='', flush=True)
|
||||
print("Downloading and saving toots", end='', flush=True)
|
||||
done = False
|
||||
try:
|
||||
while not done and len(j['orderedItems']) > 0:
|
||||
for oi in j['orderedItems']:
|
||||
if oi['type'] != "Create":
|
||||
continue #not a toost. fuck outta here
|
||||
continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore
|
||||
|
||||
# its a toost baby
|
||||
content = oi['object']['content']
|
||||
if oi['object']['summary'] != None and oi['object']['summary'] != "":
|
||||
#don't download CW'd toots
|
||||
#don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this)
|
||||
continue
|
||||
toot = extract_toot(content)
|
||||
# print(toot)
|
||||
@ -192,11 +166,12 @@ for f in following:
|
||||
if pleroma:
|
||||
if c.execute("SELECT COUNT(*) FROM toots WHERE uri LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
|
||||
#we've caught up to the notices we've already downloaded, so we can stop now
|
||||
#you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought"
|
||||
done = True
|
||||
break
|
||||
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
|
||||
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
||||
(pid,
|
||||
pid = patterns["pid"].search(oi['object']['id']).group(0)
|
||||
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", (
|
||||
pid,
|
||||
f.id,
|
||||
oi['object']['id'],
|
||||
toot
|
||||
@ -205,7 +180,6 @@ for f in following:
|
||||
pass
|
||||
except:
|
||||
pass #ignore any toots that don't successfully go into the DB
|
||||
# sys.exit(0)
|
||||
if not pleroma:
|
||||
r = requests.get(j['prev'], timeout=15)
|
||||
else:
|
||||
@ -215,9 +189,8 @@ for f in following:
|
||||
print(" Done!")
|
||||
db.commit()
|
||||
except:
|
||||
print("Encountered an error! Saving toots to database and continuing.")
|
||||
print("Encountered an error! Saving toots to database and moving to next followed account.")
|
||||
db.commit()
|
||||
# db.close()
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
41
reply.py
41
reply.py
@ -5,7 +5,7 @@
|
||||
|
||||
import mastodon
|
||||
import os, random, re, json
|
||||
import create
|
||||
import functions
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
@ -17,40 +17,25 @@ client = mastodon.Mastodon(
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
def extract_toot(toot):
|
||||
#copied from main.py, see there for comments
|
||||
soup = BeautifulSoup(toot, "html.parser")
|
||||
for lb in soup.select("br"):
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
for p in soup.select("p"):
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
for ht in soup.select("a.hashtag"):
|
||||
ht.unwrap()
|
||||
for link in soup.select("a"):
|
||||
link.insert_after(link["href"])
|
||||
link.decompose()
|
||||
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||
text = "\n".join(list(text))
|
||||
text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in
|
||||
text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one
|
||||
text = text.lower() #for easier matching
|
||||
text = functions.extract_toot(toot)
|
||||
text = re.sub(r"^@[^@]+@[^ ]+\s*", r"", text) #remove the initial mention
|
||||
text = text.lower() #treat text as lowercase for easier keyword matching (if this bot uses it)
|
||||
return text
|
||||
|
||||
class ReplyListener(mastodon.StreamListener):
|
||||
def on_notification(self, notification):
|
||||
if notification['type'] == 'mention':
|
||||
acct = "@" + notification['account']['acct']
|
||||
def on_notification(self, notification): #listen for notifications
|
||||
if notification['type'] == 'mention': #if we're mentioned:
|
||||
acct = "@" + notification['account']['acct'] #get the account's @
|
||||
post_id = notification['status']['id']
|
||||
mention = extract_toot(notification['status']['content'])
|
||||
toot = create.make_toot(True)['toot']
|
||||
toot = acct + " " + toot
|
||||
print(acct + " says " + mention)
|
||||
toot = functions.make_toot(True)['toot'] #generate a toot
|
||||
toot = acct + " " + toot #prepend the @
|
||||
print(acct + " says " + mention) #logging
|
||||
visibility = notification['status']['visibility']
|
||||
if visibility == "public":
|
||||
visibility = "unlisted"
|
||||
client.status_post(toot, post_id, visibility=visibility, spoiler_text = cfg['cw'])
|
||||
print("replied with " + toot)
|
||||
client.status_post(toot, post_id, visibility=visibility, spoiler_text = cfg['cw']) #send toost
|
||||
print("replied with " + toot) #logging
|
||||
|
||||
rl = ReplyListener()
|
||||
client.stream_user(rl)
|
||||
client.stream_user(rl) #go!
|
||||
|
Loading…
Reference in New Issue
Block a user