Compare commits

..

No commits in common. "cec2c83cb4edcebf5613887506eee88f09012197" and "fee99be4a14e2dd801f9b09f6048830c454829f1" have entirely different histories.

2 changed files with 10 additions and 155 deletions

50
gpt.py
View File

@ -209,31 +209,7 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
# Collect first to a list so we can sort/group # Collect first to a list so we can sort/group
records = [] records = []
items = uinfo.Items items = uinfo.Items
# Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
# Incremental restrict by last seen sent time (with 2-day backoff), and schema upgrade for entry_id
try:
from localcache2 import db as _db, upgrade_usefulinfo_schema
upgrade_usefulinfo_schema()
CON, CUR = _db()
last_iso = None
try:
CUR.execute("SELECT MAX(sent_iso) FROM useful_info_email")
row = CUR.fetchone()
if row and row[0]:
last_iso = row[0]
finally:
CUR.close(); CON.close()
if last_iso:
from dateutil import parser as _p
from datetime import timedelta
dt = _p.parse(str(last_iso)) - timedelta(days=2)
start_str = dt.strftime("%m/%d/%Y %I:%M %p")
items = items.Restrict(f"[ReceivedTime] >= '{start_str}'")
except Exception as ex_restrict:
# If anything fails, fall back to full set
pass
# Sort by SentOn ascending inside Outlook (helps performance for big folders)
try: try:
items.Sort("[SentOn]", True) # True => ascending items.Sort("[SentOn]", True) # True => ascending
except Exception: except Exception:
@ -259,11 +235,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
sent_on = getattr(message, "SentOn", None) sent_on = getattr(message, "SentOn", None)
sent_iso = iso(sent_on) sent_iso = iso(sent_on)
entry_id = None
try:
entry_id = getattr(message, 'EntryID', None)
except Exception:
entry_id = None
attachments_saved = [] attachments_saved = []
if save_attachments: if save_attachments:
@ -281,29 +252,12 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
except Exception: except Exception:
body = "" body = ""
# Decode Outlook SafeLinks inside body text (basic replacement)
try:
import urllib.parse as _up
def _decode_match(m):
u = m.group(0)
try:
q = _up.urlparse(u).query
params = _up.parse_qs(q)
real = params.get('url', [''])[0] or params.get('target', [''])[0]
return _up.unquote(real) if real else u
except Exception:
return u
body = re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, body)
except Exception:
pass
records.append({ records.append({
"subject_norm": subj_norm, "subject_norm": subj_norm,
"subject_raw": subj_raw, "subject_raw": subj_raw,
"sender": sender, "sender": sender,
"sent_on": sent_on, "sent_on": sent_on,
"sent_iso": sent_iso, "sent_iso": sent_iso,
"entry_id": entry_id,
"attachments": attachments_saved, "attachments": attachments_saved,
"body": body, "body": body,
}) })
@ -411,7 +365,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
body=rec['body'], body=rec['body'],
attachments=atts, attachments=atts,
summary_id=None, summary_id=None,
entry_id=rec.get('entry_id')
) )
except Exception as e: except Exception as e:
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e)) print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
@ -831,7 +784,6 @@ if __name__ == "__main__":
5: ['list faq mailbox', list_faq], 5: ['list faq mailbox', list_faq],
6: ['process useful info msgs', process_useful_info], 6: ['process useful info msgs', process_useful_info],
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)], 7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
8: ['fix safelinks in DB', lambda: (__import__('localcache2').localcache2.fix_safelinks_in_db() or True)],
} }

View File

@ -640,7 +640,6 @@ def init_usefulinfo_schema():
CREATE TABLE IF NOT EXISTS useful_info_email ( CREATE TABLE IF NOT EXISTS useful_info_email (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL, summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL,
entry_id TEXT UNIQUE,
subject_raw TEXT, subject_raw TEXT,
subject_norm TEXT, subject_norm TEXT,
sender TEXT, sender TEXT,
@ -923,74 +922,7 @@ def insert_usefulinfo_record(parsed):
return summary_id return summary_id
def _decode_safelinks_text(text): def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None):
"""Replace Outlook SafeLinks with their original URL inside given text.
Returns modified text (or original if no changes).
"""
if not text:
return text
try:
import urllib.parse as _up
import re as _re
def _decode_match(m):
u = m.group(0)
try:
q = _up.urlparse(u).query
params = _up.parse_qs(q)
real = params.get('url', [''])[0] or params.get('target', [''])[0]
return _up.unquote(real) if real else u
except Exception:
return u
return _re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, text)
except Exception:
return text
def fix_safelinks_in_db(batch_size=500):
"""Decode SafeLinks URLs in existing DB rows for:
- useful_info_email.body
- useful_info_summary.summary_text
- useful_info_attachment.text
Returns a dict of counts updated.
"""
CON, CUR = db()
updated = {'email_body': 0, 'summary_text': 0, 'attachment_text': 0}
try:
# Emails
CUR.execute("SELECT id, body FROM useful_info_email")
rows = CUR.fetchall()
for rid, body in rows:
new = _decode_safelinks_text(body)
if new != body:
CUR.execute("UPDATE useful_info_email SET body=%s WHERE id=%s", (new, rid))
updated['email_body'] += 1
CON.commit()
# Summaries
CUR.execute("SELECT id, summary_text FROM useful_info_summary")
rows = CUR.fetchall()
for rid, st in rows:
new = _decode_safelinks_text(st)
if new != st:
CUR.execute("UPDATE useful_info_summary SET summary_text=%s WHERE id=%s", (new, rid))
updated['summary_text'] += 1
CON.commit()
# Attachments text
CUR.execute("SELECT id, text FROM useful_info_attachment WHERE text IS NOT NULL")
rows = CUR.fetchall()
for rid, tx in rows:
new = _decode_safelinks_text(tx)
if new != tx:
CUR.execute("UPDATE useful_info_attachment SET text=%s WHERE id=%s", (new, rid))
updated['attachment_text'] += 1
CON.commit()
finally:
CUR.close(); CON.close()
print('[usefulinfo][fix_safelinks] updated:', updated)
return updated
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None, entry_id=None):
"""Insert an original email and any attachments. """Insert an original email and any attachments.
attachments: list of dicts like {'path': str, 'text': str or None} attachments: list of dicts like {'path': str, 'text': str or None}
summary_id: optional FK to useful_info_summary; can be None and linked later. summary_id: optional FK to useful_info_summary; can be None and linked later.
@ -1000,26 +932,14 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
CON, CUR = db() CON, CUR = db()
email_id = None email_id = None
try: try:
try: CUR.execute(
CUR.execute( """
""" INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
INSERT INTO useful_info_email (summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body) VALUES (%s, %s, %s, %s, %s, %s)
VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id
ON CONFLICT (entry_id) DO NOTHING """,
RETURNING id (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
""", )
(summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
)
except Exception:
# Fallback if entry_id column not present
CUR.execute(
"""
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
VALUES (%s, %s, %s, %s, %s, %s)
RETURNING id
""",
(summary_id, subject_raw, subject_norm, sender, sent_iso, body)
)
row = CUR.fetchone() row = CUR.fetchone()
email_id = row[0] if row else None email_id = row[0] if row else None
@ -1060,23 +980,6 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
return email_id return email_id
def upgrade_usefulinfo_schema():
"""Ensure incremental-friendly schema (entry_id unique) is present."""
CON, CUR = db()
try:
try:
CUR.execute("ALTER TABLE useful_info_email ADD COLUMN IF NOT EXISTS entry_id TEXT")
except Exception:
pass
try:
CUR.execute("CREATE UNIQUE INDEX IF NOT EXISTS useful_info_email_entry_id_idx ON useful_info_email(entry_id)")
except Exception:
pass
CON.commit()
finally:
CUR.close(); CON.close()
def link_emails_to_summary(subject_norm, summary_id): def link_emails_to_summary(subject_norm, summary_id):
"""Link any emails with the given normalized subject to the provided summary. """Link any emails with the given normalized subject to the provided summary.
Returns number of rows updated. Returns number of rows updated.