Compare commits

..

No commits in common. "cec2c83cb4edcebf5613887506eee88f09012197" and "fee99be4a14e2dd801f9b09f6048830c454829f1" have entirely different histories.

2 changed files with 10 additions and 155 deletions

50
gpt.py
View File

@ -209,31 +209,7 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
# Collect first to a list so we can sort/group
records = []
items = uinfo.Items
# Incremental restrict by last seen sent time (with 2-day backoff), and schema upgrade for entry_id
try:
from localcache2 import db as _db, upgrade_usefulinfo_schema
upgrade_usefulinfo_schema()
CON, CUR = _db()
last_iso = None
try:
CUR.execute("SELECT MAX(sent_iso) FROM useful_info_email")
row = CUR.fetchone()
if row and row[0]:
last_iso = row[0]
finally:
CUR.close(); CON.close()
if last_iso:
from dateutil import parser as _p
from datetime import timedelta
dt = _p.parse(str(last_iso)) - timedelta(days=2)
start_str = dt.strftime("%m/%d/%Y %I:%M %p")
items = items.Restrict(f"[ReceivedTime] >= '{start_str}'")
except Exception as ex_restrict:
# If anything fails, fall back to full set
pass
# Sort by SentOn ascending inside Outlook (helps performance for big folders)
# Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
try:
items.Sort("[SentOn]", True) # True => ascending
except Exception:
@ -259,11 +235,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
sent_on = getattr(message, "SentOn", None)
sent_iso = iso(sent_on)
entry_id = None
try:
entry_id = getattr(message, 'EntryID', None)
except Exception:
entry_id = None
attachments_saved = []
if save_attachments:
@ -281,29 +252,12 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
except Exception:
body = ""
# Decode Outlook SafeLinks inside body text (basic replacement)
try:
import urllib.parse as _up
def _decode_match(m):
u = m.group(0)
try:
q = _up.urlparse(u).query
params = _up.parse_qs(q)
real = params.get('url', [''])[0] or params.get('target', [''])[0]
return _up.unquote(real) if real else u
except Exception:
return u
body = re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, body)
except Exception:
pass
records.append({
"subject_norm": subj_norm,
"subject_raw": subj_raw,
"sender": sender,
"sent_on": sent_on,
"sent_iso": sent_iso,
"entry_id": entry_id,
"attachments": attachments_saved,
"body": body,
})
@ -411,7 +365,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
body=rec['body'],
attachments=atts,
summary_id=None,
entry_id=rec.get('entry_id')
)
except Exception as e:
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
@ -831,7 +784,6 @@ if __name__ == "__main__":
5: ['list faq mailbox', list_faq],
6: ['process useful info msgs', process_useful_info],
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
8: ['fix safelinks in DB', lambda: (__import__('localcache2').localcache2.fix_safelinks_in_db() or True)],
}

View File

@ -640,7 +640,6 @@ def init_usefulinfo_schema():
CREATE TABLE IF NOT EXISTS useful_info_email (
id BIGSERIAL PRIMARY KEY,
summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL,
entry_id TEXT UNIQUE,
subject_raw TEXT,
subject_norm TEXT,
sender TEXT,
@ -923,74 +922,7 @@ def insert_usefulinfo_record(parsed):
return summary_id
def _decode_safelinks_text(text):
"""Replace Outlook SafeLinks with their original URL inside given text.
Returns modified text (or original if no changes).
"""
if not text:
return text
try:
import urllib.parse as _up
import re as _re
def _decode_match(m):
u = m.group(0)
try:
q = _up.urlparse(u).query
params = _up.parse_qs(q)
real = params.get('url', [''])[0] or params.get('target', [''])[0]
return _up.unquote(real) if real else u
except Exception:
return u
return _re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, text)
except Exception:
return text
def fix_safelinks_in_db(batch_size=500):
"""Decode SafeLinks URLs in existing DB rows for:
- useful_info_email.body
- useful_info_summary.summary_text
- useful_info_attachment.text
Returns a dict of counts updated.
"""
CON, CUR = db()
updated = {'email_body': 0, 'summary_text': 0, 'attachment_text': 0}
try:
# Emails
CUR.execute("SELECT id, body FROM useful_info_email")
rows = CUR.fetchall()
for rid, body in rows:
new = _decode_safelinks_text(body)
if new != body:
CUR.execute("UPDATE useful_info_email SET body=%s WHERE id=%s", (new, rid))
updated['email_body'] += 1
CON.commit()
# Summaries
CUR.execute("SELECT id, summary_text FROM useful_info_summary")
rows = CUR.fetchall()
for rid, st in rows:
new = _decode_safelinks_text(st)
if new != st:
CUR.execute("UPDATE useful_info_summary SET summary_text=%s WHERE id=%s", (new, rid))
updated['summary_text'] += 1
CON.commit()
# Attachments text
CUR.execute("SELECT id, text FROM useful_info_attachment WHERE text IS NOT NULL")
rows = CUR.fetchall()
for rid, tx in rows:
new = _decode_safelinks_text(tx)
if new != tx:
CUR.execute("UPDATE useful_info_attachment SET text=%s WHERE id=%s", (new, rid))
updated['attachment_text'] += 1
CON.commit()
finally:
CUR.close(); CON.close()
print('[usefulinfo][fix_safelinks] updated:', updated)
return updated
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None, entry_id=None):
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None):
"""Insert an original email and any attachments.
attachments: list of dicts like {'path': str, 'text': str or None}
summary_id: optional FK to useful_info_summary; can be None and linked later.
@ -1000,26 +932,14 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
CON, CUR = db()
email_id = None
try:
try:
CUR.execute(
"""
INSERT INTO useful_info_email (summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (entry_id) DO NOTHING
RETURNING id
""",
(summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
)
except Exception:
# Fallback if entry_id column not present
CUR.execute(
"""
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
VALUES (%s, %s, %s, %s, %s, %s)
RETURNING id
""",
(summary_id, subject_raw, subject_norm, sender, sent_iso, body)
)
CUR.execute(
"""
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
VALUES (%s, %s, %s, %s, %s, %s)
RETURNING id
""",
(summary_id, subject_raw, subject_norm, sender, sent_iso, body)
)
row = CUR.fetchone()
email_id = row[0] if row else None
@ -1060,23 +980,6 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
return email_id
def upgrade_usefulinfo_schema():
"""Ensure incremental-friendly schema (entry_id unique) is present."""
CON, CUR = db()
try:
try:
CUR.execute("ALTER TABLE useful_info_email ADD COLUMN IF NOT EXISTS entry_id TEXT")
except Exception:
pass
try:
CUR.execute("CREATE UNIQUE INDEX IF NOT EXISTS useful_info_email_entry_id_idx ON useful_info_email(entry_id)")
except Exception:
pass
CON.commit()
finally:
CUR.close(); CON.close()
def link_emails_to_summary(subject_norm, summary_id):
"""Link any emails with the given normalized subject to the provided summary.
Returns number of rows updated.