Compare commits
No commits in common. "cec2c83cb4edcebf5613887506eee88f09012197" and "fee99be4a14e2dd801f9b09f6048830c454829f1" have entirely different histories.
cec2c83cb4
...
fee99be4a1
50
gpt.py
50
gpt.py
|
|
@ -209,31 +209,7 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
||||||
# Collect first to a list so we can sort/group
|
# Collect first to a list so we can sort/group
|
||||||
records = []
|
records = []
|
||||||
items = uinfo.Items
|
items = uinfo.Items
|
||||||
|
# Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
|
||||||
# Incremental restrict by last seen sent time (with 2-day backoff), and schema upgrade for entry_id
|
|
||||||
try:
|
|
||||||
from localcache2 import db as _db, upgrade_usefulinfo_schema
|
|
||||||
upgrade_usefulinfo_schema()
|
|
||||||
CON, CUR = _db()
|
|
||||||
last_iso = None
|
|
||||||
try:
|
|
||||||
CUR.execute("SELECT MAX(sent_iso) FROM useful_info_email")
|
|
||||||
row = CUR.fetchone()
|
|
||||||
if row and row[0]:
|
|
||||||
last_iso = row[0]
|
|
||||||
finally:
|
|
||||||
CUR.close(); CON.close()
|
|
||||||
if last_iso:
|
|
||||||
from dateutil import parser as _p
|
|
||||||
from datetime import timedelta
|
|
||||||
dt = _p.parse(str(last_iso)) - timedelta(days=2)
|
|
||||||
start_str = dt.strftime("%m/%d/%Y %I:%M %p")
|
|
||||||
items = items.Restrict(f"[ReceivedTime] >= '{start_str}'")
|
|
||||||
except Exception as ex_restrict:
|
|
||||||
# If anything fails, fall back to full set
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Sort by SentOn ascending inside Outlook (helps performance for big folders)
|
|
||||||
try:
|
try:
|
||||||
items.Sort("[SentOn]", True) # True => ascending
|
items.Sort("[SentOn]", True) # True => ascending
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -259,11 +235,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
||||||
|
|
||||||
sent_on = getattr(message, "SentOn", None)
|
sent_on = getattr(message, "SentOn", None)
|
||||||
sent_iso = iso(sent_on)
|
sent_iso = iso(sent_on)
|
||||||
entry_id = None
|
|
||||||
try:
|
|
||||||
entry_id = getattr(message, 'EntryID', None)
|
|
||||||
except Exception:
|
|
||||||
entry_id = None
|
|
||||||
|
|
||||||
attachments_saved = []
|
attachments_saved = []
|
||||||
if save_attachments:
|
if save_attachments:
|
||||||
|
|
@ -281,29 +252,12 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
||||||
except Exception:
|
except Exception:
|
||||||
body = ""
|
body = ""
|
||||||
|
|
||||||
# Decode Outlook SafeLinks inside body text (basic replacement)
|
|
||||||
try:
|
|
||||||
import urllib.parse as _up
|
|
||||||
def _decode_match(m):
|
|
||||||
u = m.group(0)
|
|
||||||
try:
|
|
||||||
q = _up.urlparse(u).query
|
|
||||||
params = _up.parse_qs(q)
|
|
||||||
real = params.get('url', [''])[0] or params.get('target', [''])[0]
|
|
||||||
return _up.unquote(real) if real else u
|
|
||||||
except Exception:
|
|
||||||
return u
|
|
||||||
body = re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, body)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
records.append({
|
records.append({
|
||||||
"subject_norm": subj_norm,
|
"subject_norm": subj_norm,
|
||||||
"subject_raw": subj_raw,
|
"subject_raw": subj_raw,
|
||||||
"sender": sender,
|
"sender": sender,
|
||||||
"sent_on": sent_on,
|
"sent_on": sent_on,
|
||||||
"sent_iso": sent_iso,
|
"sent_iso": sent_iso,
|
||||||
"entry_id": entry_id,
|
|
||||||
"attachments": attachments_saved,
|
"attachments": attachments_saved,
|
||||||
"body": body,
|
"body": body,
|
||||||
})
|
})
|
||||||
|
|
@ -411,7 +365,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
||||||
body=rec['body'],
|
body=rec['body'],
|
||||||
attachments=atts,
|
attachments=atts,
|
||||||
summary_id=None,
|
summary_id=None,
|
||||||
entry_id=rec.get('entry_id')
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
|
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
|
||||||
|
|
@ -831,7 +784,6 @@ if __name__ == "__main__":
|
||||||
5: ['list faq mailbox', list_faq],
|
5: ['list faq mailbox', list_faq],
|
||||||
6: ['process useful info msgs', process_useful_info],
|
6: ['process useful info msgs', process_useful_info],
|
||||||
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
|
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
|
||||||
8: ['fix safelinks in DB', lambda: (__import__('localcache2').localcache2.fix_safelinks_in_db() or True)],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
115
localcache2.py
115
localcache2.py
|
|
@ -640,7 +640,6 @@ def init_usefulinfo_schema():
|
||||||
CREATE TABLE IF NOT EXISTS useful_info_email (
|
CREATE TABLE IF NOT EXISTS useful_info_email (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL,
|
summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL,
|
||||||
entry_id TEXT UNIQUE,
|
|
||||||
subject_raw TEXT,
|
subject_raw TEXT,
|
||||||
subject_norm TEXT,
|
subject_norm TEXT,
|
||||||
sender TEXT,
|
sender TEXT,
|
||||||
|
|
@ -923,74 +922,7 @@ def insert_usefulinfo_record(parsed):
|
||||||
return summary_id
|
return summary_id
|
||||||
|
|
||||||
|
|
||||||
def _decode_safelinks_text(text):
|
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None):
|
||||||
"""Replace Outlook SafeLinks with their original URL inside given text.
|
|
||||||
Returns modified text (or original if no changes).
|
|
||||||
"""
|
|
||||||
if not text:
|
|
||||||
return text
|
|
||||||
try:
|
|
||||||
import urllib.parse as _up
|
|
||||||
import re as _re
|
|
||||||
def _decode_match(m):
|
|
||||||
u = m.group(0)
|
|
||||||
try:
|
|
||||||
q = _up.urlparse(u).query
|
|
||||||
params = _up.parse_qs(q)
|
|
||||||
real = params.get('url', [''])[0] or params.get('target', [''])[0]
|
|
||||||
return _up.unquote(real) if real else u
|
|
||||||
except Exception:
|
|
||||||
return u
|
|
||||||
return _re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, text)
|
|
||||||
except Exception:
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def fix_safelinks_in_db(batch_size=500):
|
|
||||||
"""Decode SafeLinks URLs in existing DB rows for:
|
|
||||||
- useful_info_email.body
|
|
||||||
- useful_info_summary.summary_text
|
|
||||||
- useful_info_attachment.text
|
|
||||||
Returns a dict of counts updated.
|
|
||||||
"""
|
|
||||||
CON, CUR = db()
|
|
||||||
updated = {'email_body': 0, 'summary_text': 0, 'attachment_text': 0}
|
|
||||||
try:
|
|
||||||
# Emails
|
|
||||||
CUR.execute("SELECT id, body FROM useful_info_email")
|
|
||||||
rows = CUR.fetchall()
|
|
||||||
for rid, body in rows:
|
|
||||||
new = _decode_safelinks_text(body)
|
|
||||||
if new != body:
|
|
||||||
CUR.execute("UPDATE useful_info_email SET body=%s WHERE id=%s", (new, rid))
|
|
||||||
updated['email_body'] += 1
|
|
||||||
CON.commit()
|
|
||||||
|
|
||||||
# Summaries
|
|
||||||
CUR.execute("SELECT id, summary_text FROM useful_info_summary")
|
|
||||||
rows = CUR.fetchall()
|
|
||||||
for rid, st in rows:
|
|
||||||
new = _decode_safelinks_text(st)
|
|
||||||
if new != st:
|
|
||||||
CUR.execute("UPDATE useful_info_summary SET summary_text=%s WHERE id=%s", (new, rid))
|
|
||||||
updated['summary_text'] += 1
|
|
||||||
CON.commit()
|
|
||||||
|
|
||||||
# Attachments text
|
|
||||||
CUR.execute("SELECT id, text FROM useful_info_attachment WHERE text IS NOT NULL")
|
|
||||||
rows = CUR.fetchall()
|
|
||||||
for rid, tx in rows:
|
|
||||||
new = _decode_safelinks_text(tx)
|
|
||||||
if new != tx:
|
|
||||||
CUR.execute("UPDATE useful_info_attachment SET text=%s WHERE id=%s", (new, rid))
|
|
||||||
updated['attachment_text'] += 1
|
|
||||||
CON.commit()
|
|
||||||
finally:
|
|
||||||
CUR.close(); CON.close()
|
|
||||||
print('[usefulinfo][fix_safelinks] updated:', updated)
|
|
||||||
return updated
|
|
||||||
|
|
||||||
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None, entry_id=None):
|
|
||||||
"""Insert an original email and any attachments.
|
"""Insert an original email and any attachments.
|
||||||
attachments: list of dicts like {'path': str, 'text': str or None}
|
attachments: list of dicts like {'path': str, 'text': str or None}
|
||||||
summary_id: optional FK to useful_info_summary; can be None and linked later.
|
summary_id: optional FK to useful_info_summary; can be None and linked later.
|
||||||
|
|
@ -1000,26 +932,14 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
|
||||||
CON, CUR = db()
|
CON, CUR = db()
|
||||||
email_id = None
|
email_id = None
|
||||||
try:
|
try:
|
||||||
try:
|
CUR.execute(
|
||||||
CUR.execute(
|
"""
|
||||||
"""
|
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
|
||||||
INSERT INTO useful_info_email (summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
|
VALUES (%s, %s, %s, %s, %s, %s)
|
||||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
RETURNING id
|
||||||
ON CONFLICT (entry_id) DO NOTHING
|
""",
|
||||||
RETURNING id
|
(summary_id, subject_raw, subject_norm, sender, sent_iso, body)
|
||||||
""",
|
)
|
||||||
(summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
# Fallback if entry_id column not present
|
|
||||||
CUR.execute(
|
|
||||||
"""
|
|
||||||
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
|
|
||||||
VALUES (%s, %s, %s, %s, %s, %s)
|
|
||||||
RETURNING id
|
|
||||||
""",
|
|
||||||
(summary_id, subject_raw, subject_norm, sender, sent_iso, body)
|
|
||||||
)
|
|
||||||
row = CUR.fetchone()
|
row = CUR.fetchone()
|
||||||
email_id = row[0] if row else None
|
email_id = row[0] if row else None
|
||||||
|
|
||||||
|
|
@ -1060,23 +980,6 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
|
||||||
return email_id
|
return email_id
|
||||||
|
|
||||||
|
|
||||||
def upgrade_usefulinfo_schema():
|
|
||||||
"""Ensure incremental-friendly schema (entry_id unique) is present."""
|
|
||||||
CON, CUR = db()
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
CUR.execute("ALTER TABLE useful_info_email ADD COLUMN IF NOT EXISTS entry_id TEXT")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
CUR.execute("CREATE UNIQUE INDEX IF NOT EXISTS useful_info_email_entry_id_idx ON useful_info_email(entry_id)")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
CON.commit()
|
|
||||||
finally:
|
|
||||||
CUR.close(); CON.close()
|
|
||||||
|
|
||||||
|
|
||||||
def link_emails_to_summary(subject_norm, summary_id):
|
def link_emails_to_summary(subject_norm, summary_id):
|
||||||
"""Link any emails with the given normalized subject to the provided summary.
|
"""Link any emails with the given normalized subject to the provided summary.
|
||||||
Returns number of rows updated.
|
Returns number of rows updated.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue