fix for pleroma

This commit is contained in:
Lynne 2018-10-27 18:28:20 +10:00
parent a1324acfba
commit eeba1c9066

116
main.py
View File

@ -14,15 +14,15 @@ scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"]
cfg = json.load(open('config.json', 'r')) cfg = json.load(open('config.json', 'r'))
if os.path.exists("clientcred.secret"): if os.path.exists("clientcred.secret"):
print("Upgrading to new storage method") print("Upgrading to new storage method")
cc = open("clientcred.secret").read().split("\n") cc = open("clientcred.secret").read().split("\n")
cfg['client'] = { cfg['client'] = {
"id": cc[0], "id": cc[0],
"secret": cc[1] "secret": cc[1]
} }
cfg['secret'] = open("usercred.secret").read().rstrip("\n") cfg['secret'] = open("usercred.secret").read().rstrip("\n")
os.remove("clientcred.secret") os.remove("clientcred.secret")
os.remove("usercred.secret") os.remove("usercred.secret")
if "client" not in cfg: if "client" not in cfg:
@ -105,6 +105,26 @@ def handleCtrlC(signal, frame):
signal.signal(signal.SIGINT, handleCtrlC) signal.signal(signal.SIGINT, handleCtrlC)
def get_toots_legacy(client, id):
i = 0
toots = client.account_statuses(id)
while toots is not None and len(toots) > 0:
for toot in toots:
if toot.spoiler_text != "": continue
if toot.reblog is not None: continue
if toot.visibility not in ["public", "unlisted"]: continue
t = extract_toot(toot.content)
if t != None:
yield {
"toot": t,
"id": toot.id,
"uri": toot.uri
}
toots = client.fetch_next(toots)
i += 1
if i%20 == 0:
print('.', end='', flush=True)
for f in following: for f in following:
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
if last_toot != None: if last_toot != None:
@ -114,7 +134,7 @@ for f in following:
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
#find the user's activitypub outbox #find the user's activitypub outbox
#print("WebFingering...") print("WebFingering...")
instance = re.search(r"^.*@(.+)", f.acct) instance = re.search(r"^.*@(.+)", f.acct)
if instance == None: if instance == None:
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
@ -130,8 +150,12 @@ for f in following:
r = requests.get("https://{}/.well-known/host-meta".format(instance)) r = requests.get("https://{}/.well-known/host-meta".format(instance))
uri = re.search(r'template="([^"]+)"', r.text).group(1) uri = re.search(r'template="([^"]+)"', r.text).group(1)
uri = uri.format(uri = "{}@{}".format(f.username, instance)) uri = uri.format(uri = "{}@{}".format(f.username, instance))
r = requests.get(uri) r = requests.get(uri, headers={"Accept": "application/json"})
uri = r.json()['aliases'][1] #TODO: find out if it's safe to rely on this j = r.json()
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
uri = j['aliases'][0]
else:
uri = j['aliases'][1]
uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot) uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot)
r = requests.get(uri) r = requests.get(uri)
j = r.json() j = r.json()
@ -139,34 +163,54 @@ for f in following:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1) sys.exit(1)
pleroma = False
if 'first' in j:
print("{} is a pleroma instance -- falling back to legacy toot collection method".format(instance))
pleroma = True
print("Downloading and parsing toots", end='', flush=True) print("Downloading and parsing toots", end='', flush=True)
current = None current = None
try: try:
while len(j['orderedItems']) > 0: if pleroma:
for oi in j['orderedItems']: for t in get_toots_legacy(client, f.id):
if oi['type'] == "Create": try:
# its a toost baby c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
content = oi['object']['content'] (t['id'],
if oi['object']['summary'] != None: f.id,
#don't download CW'd toots t['uri'],
continue t['toot']
toot = extract_toot(content)
# print(toot)
try:
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
(re.search(r"[^\/]+$", oi['object']['id']).group(0),
f.id,
oi['object']['id'],
toot
)
) )
pass )
except: except:
pass #ignore any toots that don't go into the DB pass
# sys.exit(0)
r = requests.get(j['prev']) else:
j = r.json() while len(j['orderedItems']) > 0:
print('.', end='', flush=True) for oi in j['orderedItems']:
if (not pleroma and oi['type'] == "Create") or (pleroma and oi['to']['type'] == "Create"):
# its a toost baby
content = oi['object']['content']
if oi['object']['summary'] != None:
#don't download CW'd toots
continue
toot = extract_toot(content)
# print(toot)
try:
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
(pid,
f.id,
oi['object']['id'],
toot
)
)
pass
except:
pass #ignore any toots that don't go into the DB
# sys.exit(0)
r = requests.get(j['prev'])
j = r.json()
print('.', end='', flush=True)
print(" Done!") print(" Done!")
db.commit() db.commit()
except: except: