Compare commits
No commits in common. "cec2c83cb4edcebf5613887506eee88f09012197" and "fee99be4a14e2dd801f9b09f6048830c454829f1" have entirely different histories.
cec2c83cb4
...
fee99be4a1
50
gpt.py
50
gpt.py
|
|
@ -209,31 +209,7 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
|||
# Collect first to a list so we can sort/group
|
||||
records = []
|
||||
items = uinfo.Items
|
||||
|
||||
# Incremental restrict by last seen sent time (with 2-day backoff), and schema upgrade for entry_id
|
||||
try:
|
||||
from localcache2 import db as _db, upgrade_usefulinfo_schema
|
||||
upgrade_usefulinfo_schema()
|
||||
CON, CUR = _db()
|
||||
last_iso = None
|
||||
try:
|
||||
CUR.execute("SELECT MAX(sent_iso) FROM useful_info_email")
|
||||
row = CUR.fetchone()
|
||||
if row and row[0]:
|
||||
last_iso = row[0]
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
if last_iso:
|
||||
from dateutil import parser as _p
|
||||
from datetime import timedelta
|
||||
dt = _p.parse(str(last_iso)) - timedelta(days=2)
|
||||
start_str = dt.strftime("%m/%d/%Y %I:%M %p")
|
||||
items = items.Restrict(f"[ReceivedTime] >= '{start_str}'")
|
||||
except Exception as ex_restrict:
|
||||
# If anything fails, fall back to full set
|
||||
pass
|
||||
|
||||
# Sort by SentOn ascending inside Outlook (helps performance for big folders)
|
||||
# Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
|
||||
try:
|
||||
items.Sort("[SentOn]", True) # True => ascending
|
||||
except Exception:
|
||||
|
|
@ -259,11 +235,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
|||
|
||||
sent_on = getattr(message, "SentOn", None)
|
||||
sent_iso = iso(sent_on)
|
||||
entry_id = None
|
||||
try:
|
||||
entry_id = getattr(message, 'EntryID', None)
|
||||
except Exception:
|
||||
entry_id = None
|
||||
|
||||
attachments_saved = []
|
||||
if save_attachments:
|
||||
|
|
@ -281,29 +252,12 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
|||
except Exception:
|
||||
body = ""
|
||||
|
||||
# Decode Outlook SafeLinks inside body text (basic replacement)
|
||||
try:
|
||||
import urllib.parse as _up
|
||||
def _decode_match(m):
|
||||
u = m.group(0)
|
||||
try:
|
||||
q = _up.urlparse(u).query
|
||||
params = _up.parse_qs(q)
|
||||
real = params.get('url', [''])[0] or params.get('target', [''])[0]
|
||||
return _up.unquote(real) if real else u
|
||||
except Exception:
|
||||
return u
|
||||
body = re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, body)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
records.append({
|
||||
"subject_norm": subj_norm,
|
||||
"subject_raw": subj_raw,
|
||||
"sender": sender,
|
||||
"sent_on": sent_on,
|
||||
"sent_iso": sent_iso,
|
||||
"entry_id": entry_id,
|
||||
"attachments": attachments_saved,
|
||||
"body": body,
|
||||
})
|
||||
|
|
@ -411,7 +365,6 @@ def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
|||
body=rec['body'],
|
||||
attachments=atts,
|
||||
summary_id=None,
|
||||
entry_id=rec.get('entry_id')
|
||||
)
|
||||
except Exception as e:
|
||||
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
|
||||
|
|
@ -831,7 +784,6 @@ if __name__ == "__main__":
|
|||
5: ['list faq mailbox', list_faq],
|
||||
6: ['process useful info msgs', process_useful_info],
|
||||
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
|
||||
8: ['fix safelinks in DB', lambda: (__import__('localcache2').localcache2.fix_safelinks_in_db() or True)],
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -640,7 +640,6 @@ def init_usefulinfo_schema():
|
|||
CREATE TABLE IF NOT EXISTS useful_info_email (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
summary_id BIGINT NULL REFERENCES useful_info_summary(id) ON DELETE SET NULL,
|
||||
entry_id TEXT UNIQUE,
|
||||
subject_raw TEXT,
|
||||
subject_norm TEXT,
|
||||
sender TEXT,
|
||||
|
|
@ -923,74 +922,7 @@ def insert_usefulinfo_record(parsed):
|
|||
return summary_id
|
||||
|
||||
|
||||
def _decode_safelinks_text(text):
|
||||
"""Replace Outlook SafeLinks with their original URL inside given text.
|
||||
Returns modified text (or original if no changes).
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
try:
|
||||
import urllib.parse as _up
|
||||
import re as _re
|
||||
def _decode_match(m):
|
||||
u = m.group(0)
|
||||
try:
|
||||
q = _up.urlparse(u).query
|
||||
params = _up.parse_qs(q)
|
||||
real = params.get('url', [''])[0] or params.get('target', [''])[0]
|
||||
return _up.unquote(real) if real else u
|
||||
except Exception:
|
||||
return u
|
||||
return _re.sub(r"https?://[\w\.-]*safelinks\.protection\.outlook\.com/[^\s\)\]\>]+", _decode_match, text)
|
||||
except Exception:
|
||||
return text
|
||||
|
||||
|
||||
def fix_safelinks_in_db(batch_size=500):
|
||||
"""Decode SafeLinks URLs in existing DB rows for:
|
||||
- useful_info_email.body
|
||||
- useful_info_summary.summary_text
|
||||
- useful_info_attachment.text
|
||||
Returns a dict of counts updated.
|
||||
"""
|
||||
CON, CUR = db()
|
||||
updated = {'email_body': 0, 'summary_text': 0, 'attachment_text': 0}
|
||||
try:
|
||||
# Emails
|
||||
CUR.execute("SELECT id, body FROM useful_info_email")
|
||||
rows = CUR.fetchall()
|
||||
for rid, body in rows:
|
||||
new = _decode_safelinks_text(body)
|
||||
if new != body:
|
||||
CUR.execute("UPDATE useful_info_email SET body=%s WHERE id=%s", (new, rid))
|
||||
updated['email_body'] += 1
|
||||
CON.commit()
|
||||
|
||||
# Summaries
|
||||
CUR.execute("SELECT id, summary_text FROM useful_info_summary")
|
||||
rows = CUR.fetchall()
|
||||
for rid, st in rows:
|
||||
new = _decode_safelinks_text(st)
|
||||
if new != st:
|
||||
CUR.execute("UPDATE useful_info_summary SET summary_text=%s WHERE id=%s", (new, rid))
|
||||
updated['summary_text'] += 1
|
||||
CON.commit()
|
||||
|
||||
# Attachments text
|
||||
CUR.execute("SELECT id, text FROM useful_info_attachment WHERE text IS NOT NULL")
|
||||
rows = CUR.fetchall()
|
||||
for rid, tx in rows:
|
||||
new = _decode_safelinks_text(tx)
|
||||
if new != tx:
|
||||
CUR.execute("UPDATE useful_info_attachment SET text=%s WHERE id=%s", (new, rid))
|
||||
updated['attachment_text'] += 1
|
||||
CON.commit()
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
print('[usefulinfo][fix_safelinks] updated:', updated)
|
||||
return updated
|
||||
|
||||
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None, entry_id=None):
|
||||
def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, attachments=None, summary_id=None):
|
||||
"""Insert an original email and any attachments.
|
||||
attachments: list of dicts like {'path': str, 'text': str or None}
|
||||
summary_id: optional FK to useful_info_summary; can be None and linked later.
|
||||
|
|
@ -1000,18 +932,6 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
|
|||
CON, CUR = db()
|
||||
email_id = None
|
||||
try:
|
||||
try:
|
||||
CUR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_email (summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (entry_id) DO NOTHING
|
||||
RETURNING id
|
||||
""",
|
||||
(summary_id, entry_id, subject_raw, subject_norm, sender, sent_iso, body)
|
||||
)
|
||||
except Exception:
|
||||
# Fallback if entry_id column not present
|
||||
CUR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_email (summary_id, subject_raw, subject_norm, sender, sent_iso, body)
|
||||
|
|
@ -1060,23 +980,6 @@ def insert_usefulinfo_email(subject_raw, subject_norm, sender, sent_iso, body, a
|
|||
return email_id
|
||||
|
||||
|
||||
def upgrade_usefulinfo_schema():
|
||||
"""Ensure incremental-friendly schema (entry_id unique) is present."""
|
||||
CON, CUR = db()
|
||||
try:
|
||||
try:
|
||||
CUR.execute("ALTER TABLE useful_info_email ADD COLUMN IF NOT EXISTS entry_id TEXT")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
CUR.execute("CREATE UNIQUE INDEX IF NOT EXISTS useful_info_email_entry_id_idx ON useful_info_email(entry_id)")
|
||||
except Exception:
|
||||
pass
|
||||
CON.commit()
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
|
||||
|
||||
def link_emails_to_summary(subject_norm, summary_id):
|
||||
"""Link any emails with the given normalized subject to the provided summary.
|
||||
Returns number of rows updated.
|
||||
|
|
|
|||
Loading…
Reference in New Issue