usefuleemails
This commit is contained in:
parent
20366246db
commit
d8a8714562
144
gpt.py
144
gpt.py
|
|
@ -426,134 +426,8 @@ def process_useful_info():
|
|||
return "\n\n".join(out_chunks)
|
||||
|
||||
OUT_JSONL = Path("cache/useful_info_summaries.jsonl")
|
||||
|
||||
# --- PostgreSQL schema + insert helpers (via localcache2.db) ---
|
||||
def _pg_init_schema():
|
||||
try:
|
||||
from localcache2 import db as _db
|
||||
CON, CURSOR = _db()
|
||||
try:
|
||||
CURSOR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_summary (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
summary_hash CHAR(64) UNIQUE,
|
||||
grp_index INTEGER,
|
||||
subject TEXT,
|
||||
thread_count INTEGER,
|
||||
source TEXT,
|
||||
date_label TEXT,
|
||||
tags_json JSONB,
|
||||
short_text TEXT,
|
||||
summary_text TEXT,
|
||||
attachments_json JSONB,
|
||||
raw_json TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
"""
|
||||
)
|
||||
CURSOR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_event (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
|
||||
dt TEXT,
|
||||
length TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
"""
|
||||
)
|
||||
CON.commit()
|
||||
finally:
|
||||
CURSOR.close()
|
||||
CON.close()
|
||||
except Exception as e:
|
||||
print("[warn] could not init PostgreSQL schema:", e)
|
||||
|
||||
def _sha256(s):
|
||||
import hashlib
|
||||
return hashlib.sha256(s.encode('utf-8', 'ignore')).hexdigest()
|
||||
|
||||
def _pg_insert_summary_and_events(idx, subject, count, attachments, parsed, raw):
|
||||
try:
|
||||
from localcache2 import db as _db
|
||||
import json as _json
|
||||
CON, CURSOR = _db()
|
||||
try:
|
||||
source = None
|
||||
date_label = None
|
||||
tags = None
|
||||
short_text = ''
|
||||
summary_text = ''
|
||||
events = []
|
||||
if parsed:
|
||||
source = parsed.get('source')
|
||||
date_label = parsed.get('date')
|
||||
tags = parsed.get('tags')
|
||||
short_text = parsed.get('short') or ''
|
||||
summary_text = parsed.get('summary') or ''
|
||||
events = parsed.get('events') or []
|
||||
|
||||
s_hash = _sha256((subject or '') + "\n" + short_text + "\n" + summary_text)
|
||||
CURSOR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_summary
|
||||
(summary_hash, grp_index, subject, thread_count, source, date_label,
|
||||
tags_json, short_text, summary_text, attachments_json, raw_json)
|
||||
VALUES
|
||||
(%s, %s, %s, %s, %s, %s,
|
||||
CAST(%s AS JSONB), %s, %s, CAST(%s AS JSONB), %s)
|
||||
ON CONFLICT (summary_hash)
|
||||
DO UPDATE SET grp_index = EXCLUDED.grp_index
|
||||
RETURNING id
|
||||
""",
|
||||
(
|
||||
s_hash,
|
||||
idx,
|
||||
subject,
|
||||
count,
|
||||
source,
|
||||
date_label,
|
||||
_json.dumps(tags) if tags is not None else None,
|
||||
short_text,
|
||||
summary_text,
|
||||
_json.dumps(attachments) if attachments else None,
|
||||
raw,
|
||||
),
|
||||
)
|
||||
row = CURSOR.fetchone()
|
||||
summary_id = row[0] if row else None
|
||||
|
||||
if summary_id and isinstance(events, list):
|
||||
for e in events:
|
||||
try:
|
||||
CURSOR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_event
|
||||
(summary_id, dt, length, title, description)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
summary_id,
|
||||
(e or {}).get('dt'),
|
||||
(e or {}).get('length'),
|
||||
(e or {}).get('title'),
|
||||
(e or {}).get('description'),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
CON.commit()
|
||||
finally:
|
||||
CURSOR.close()
|
||||
CON.close()
|
||||
except Exception as e:
|
||||
print("[warn] PostgreSQL insert failed:", e)
|
||||
|
||||
# Ensure DB schema exists
|
||||
_pg_init_schema()
|
||||
from localcache2 import init_usefulinfo_schema, insert_usefulinfo_record
|
||||
init_usefulinfo_schema()
|
||||
|
||||
def demo_f(idx, g):
|
||||
print(f"[{idx}] {g['subject']} (count: {g['count']})")
|
||||
|
|
@ -580,15 +454,12 @@ def process_useful_info():
|
|||
with open(OUT_JSONL, "a", encoding="utf-8") as outf:
|
||||
outf.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||||
|
||||
# Also persist to PostgreSQL using localcache2
|
||||
# Also persist to PostgreSQL using localcache2 with only parsed JSON
|
||||
if 'summary' in record:
|
||||
_pg_insert_summary_and_events(
|
||||
idx, record.get('subject'), record.get('count'), attach_paths, record['summary'], None
|
||||
)
|
||||
else:
|
||||
_pg_insert_summary_and_events(
|
||||
idx, record.get('subject'), record.get('count'), attach_paths, None, record.get('summary_raw')
|
||||
)
|
||||
try:
|
||||
insert_usefulinfo_record(record['summary'])
|
||||
except Exception as e:
|
||||
print('[warn] DB insert failed:', e)
|
||||
|
||||
for_each_group(
|
||||
log_path="cache/email_usefulinfo_sorted.txt",
|
||||
|
|
@ -740,6 +611,7 @@ if __name__ == "__main__":
|
|||
4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
|
||||
5: ['list faq mailbox', list_faq],
|
||||
6: ['process useful info msgs', process_useful_info],
|
||||
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
319
localcache2.py
319
localcache2.py
|
|
@ -1,6 +1,7 @@
|
|||
# Local data, saving and manipulating
|
||||
import util
|
||||
import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
|
||||
import hashlib
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
|
|
@ -576,6 +577,322 @@ def iLearn_name_from_goo(goo):
|
|||
return cursor.fetchone()
|
||||
|
||||
|
||||
# -------------------- Useful Info (summaries, events, tags) --------------------
|
||||
|
||||
def init_usefulinfo_schema():
|
||||
"""Create tables for summaries, events, tags, and link tables if missing."""
|
||||
CON, CUR = db()
|
||||
try:
|
||||
CUR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_summary (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
summary_hash CHAR(64) UNIQUE,
|
||||
source TEXT,
|
||||
date_label TEXT,
|
||||
short_text TEXT,
|
||||
summary_text TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
"""
|
||||
)
|
||||
CUR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_event (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
dt TEXT,
|
||||
length TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
"""
|
||||
)
|
||||
CUR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_summary_event (
|
||||
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
|
||||
event_id BIGINT NOT NULL REFERENCES useful_info_event(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (summary_id, event_id)
|
||||
);
|
||||
"""
|
||||
)
|
||||
CUR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_tag (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
name TEXT UNIQUE
|
||||
);
|
||||
"""
|
||||
)
|
||||
CUR.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS useful_info_summary_tag (
|
||||
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
|
||||
tag_id BIGINT NOT NULL REFERENCES useful_info_tag(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (summary_id, tag_id)
|
||||
);
|
||||
"""
|
||||
)
|
||||
CON.commit()
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
|
||||
|
||||
def _sha256(s):
|
||||
return hashlib.sha256(s.encode('utf-8','ignore')).hexdigest()
|
||||
|
||||
|
||||
def _get_or_create_tag_id(CUR, name):
|
||||
try:
|
||||
CUR.execute("INSERT INTO useful_info_tag (name) VALUES (%s) ON CONFLICT (name) DO NOTHING RETURNING id", (name,))
|
||||
row = CUR.fetchone()
|
||||
if row and row[0]:
|
||||
return row[0]
|
||||
except Exception:
|
||||
pass
|
||||
CUR.execute("SELECT id FROM useful_info_tag WHERE name=%s", (name,))
|
||||
row = CUR.fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
|
||||
def insert_usefulinfo_record(parsed):
|
||||
"""
|
||||
Insert a summarize_u_info() JSON result into Postgres.
|
||||
Expected keys: source, date, tags (list), short, summary, events (list of {dt,length,title,description}).
|
||||
Dedups summaries using a stable hash; links tags and events via link tables.
|
||||
Returns summary_id.
|
||||
"""
|
||||
if not isinstance(parsed, dict):
|
||||
return None
|
||||
source = parsed.get('source')
|
||||
date_label = parsed.get('date')
|
||||
short_text = parsed.get('short') or ''
|
||||
summary_text = parsed.get('summary') or ''
|
||||
tags = parsed.get('tags') or []
|
||||
events = parsed.get('events') or []
|
||||
|
||||
s_hash = _sha256((source or '') + "\n" + (date_label or '') + "\n" + short_text + "\n" + summary_text)
|
||||
|
||||
CON, CUR = db()
|
||||
summary_id = None
|
||||
try:
|
||||
CUR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_summary
|
||||
(summary_hash, source, date_label, short_text, summary_text)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (summary_hash)
|
||||
DO UPDATE SET short_text=EXCLUDED.short_text, summary_text=EXCLUDED.summary_text
|
||||
RETURNING id
|
||||
""",
|
||||
(s_hash, source, date_label, short_text, summary_text)
|
||||
)
|
||||
row = CUR.fetchone()
|
||||
summary_id = row[0] if row else None
|
||||
|
||||
# Tags
|
||||
if summary_id and isinstance(tags, list):
|
||||
for t in tags:
|
||||
if not t:
|
||||
continue
|
||||
tag_id = _get_or_create_tag_id(CUR, str(t))
|
||||
if tag_id:
|
||||
try:
|
||||
CUR.execute(
|
||||
"INSERT INTO useful_info_summary_tag (summary_id, tag_id) VALUES (%s, %s) ON CONFLICT DO NOTHING",
|
||||
(summary_id, tag_id)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Events
|
||||
if summary_id and isinstance(events, list):
|
||||
for e in events:
|
||||
try:
|
||||
CUR.execute(
|
||||
"""
|
||||
INSERT INTO useful_info_event (dt, length, title, description)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
RETURNING id
|
||||
""",
|
||||
(
|
||||
(e or {}).get('dt'),
|
||||
(e or {}).get('length'),
|
||||
(e or {}).get('title'),
|
||||
(e or {}).get('description'),
|
||||
)
|
||||
)
|
||||
evrow = CUR.fetchone()
|
||||
if evrow and evrow[0]:
|
||||
CUR.execute(
|
||||
"INSERT INTO useful_info_summary_event (summary_id, event_id) VALUES (%s, %s) ON CONFLICT DO NOTHING",
|
||||
(summary_id, evrow[0])
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
CON.commit()
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
return summary_id
|
||||
|
||||
|
||||
def export_usefulinfo_events_to_ics(filepath='cache/useful_info_events.ics'):
|
||||
"""Export events from useful info tables to an .ics file.
|
||||
- Attempts to parse dt and length into DTSTART/DTEND.
|
||||
- Includes title (SUMMARY), description (DESCRIPTION), and tags (CATEGORIES).
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from dateutil import parser as dtparser
|
||||
|
||||
CON, CUR = db()
|
||||
try:
|
||||
# Pull events with linked summary and aggregated tags
|
||||
CUR.execute(
|
||||
"""
|
||||
SELECT e.id, e.dt, e.length, e.title, e.description,
|
||||
s.source, s.date_label, s.short_text,
|
||||
COALESCE(array_agg(t.name) FILTER (WHERE t.name IS NOT NULL), '{}') AS tags
|
||||
FROM useful_info_event e
|
||||
JOIN useful_info_summary_event se ON se.event_id = e.id
|
||||
JOIN useful_info_summary s ON s.id = se.summary_id
|
||||
LEFT JOIN useful_info_summary_tag st ON st.summary_id = s.id
|
||||
LEFT JOIN useful_info_tag t ON t.id = st.tag_id
|
||||
GROUP BY e.id, s.source, s.date_label, s.short_text
|
||||
ORDER BY e.id
|
||||
"""
|
||||
)
|
||||
rows = CUR.fetchall()
|
||||
finally:
|
||||
CUR.close(); CON.close()
|
||||
|
||||
def _parse_minutes(length_str):
|
||||
if not length_str:
|
||||
return 60
|
||||
try:
|
||||
n = int(length_str)
|
||||
if n <= 12:
|
||||
return n * 60
|
||||
return n
|
||||
except Exception:
|
||||
pass
|
||||
m = re.findall(r"(\d+(?:\.\d+)?)\s*([hm])", length_str, flags=re.I)
|
||||
minutes = 0
|
||||
if m:
|
||||
for num, unit in m:
|
||||
try:
|
||||
val = float(num)
|
||||
if unit.lower() == 'h':
|
||||
minutes += int(val * 60)
|
||||
else:
|
||||
minutes += int(val)
|
||||
except Exception:
|
||||
pass
|
||||
if minutes > 0:
|
||||
return minutes
|
||||
return 60
|
||||
|
||||
def _has_time_component(s):
|
||||
if not s:
|
||||
return False
|
||||
if re.search(r"\d\d?:\d\d", s):
|
||||
return True
|
||||
if re.search(r"\b(am|pm)\b", s, re.I):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _format_dt(dtobj):
|
||||
# Local time (floating) format
|
||||
return dtobj.strftime('%Y%m%dT%H%M%S')
|
||||
|
||||
now_utc = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
|
||||
lines = []
|
||||
lines.append('BEGIN:VCALENDAR')
|
||||
lines.append('VERSION:2.0')
|
||||
lines.append('PRODID:-//canvasapp//Useful Info Events//EN')
|
||||
lines.append('CALSCALE:GREGORIAN')
|
||||
lines.append('METHOD:PUBLISH')
|
||||
lines.append('X-WR-CALNAME:Useful Info')
|
||||
|
||||
for r in rows:
|
||||
ev_id = r[0]
|
||||
dt_str = r[1]
|
||||
length_str = r[2]
|
||||
title = r[3] or ''
|
||||
desc = r[4] or ''
|
||||
source = r[5] or ''
|
||||
date_label = r[6] or ''
|
||||
short_text = r[7] or ''
|
||||
tags = r[8] or []
|
||||
|
||||
# Try to parse DTSTART/DTEND
|
||||
all_day = not _has_time_component(str(dt_str))
|
||||
dtstart = None
|
||||
dtend = None
|
||||
try:
|
||||
if dt_str:
|
||||
parsed = dtparser.parse(str(dt_str), fuzzy=True)
|
||||
if all_day:
|
||||
# All-day event
|
||||
dtstart = parsed.date()
|
||||
dtend = (parsed.date() + timedelta(days=1))
|
||||
else:
|
||||
dtstart = parsed
|
||||
minutes = _parse_minutes(str(length_str))
|
||||
dtend = parsed + timedelta(minutes=minutes)
|
||||
except Exception:
|
||||
# If we cannot parse date, skip this event
|
||||
continue
|
||||
|
||||
lines.append('BEGIN:VEVENT')
|
||||
lines.append('UID:usefulinfo-event-%s@gavilan' % ev_id)
|
||||
lines.append('DTSTAMP:%s' % now_utc)
|
||||
|
||||
if all_day and dtstart and dtend:
|
||||
lines.append('DTSTART;VALUE=DATE:%s' % dtstart.strftime('%Y%m%d'))
|
||||
lines.append('DTEND;VALUE=DATE:%s' % dtend.strftime('%Y%m%d'))
|
||||
elif dtstart and dtend:
|
||||
lines.append('DTSTART:%s' % _format_dt(dtstart))
|
||||
lines.append('DTEND:%s' % _format_dt(dtend))
|
||||
|
||||
if title:
|
||||
lines.append('SUMMARY:%s' % title.replace('\n', ' ').replace('\r', ' '))
|
||||
|
||||
full_desc = desc
|
||||
extra = []
|
||||
if short_text:
|
||||
extra.append('Context: ' + short_text)
|
||||
if source or date_label:
|
||||
extra.append('Source: %s Date label: %s' % (source, date_label))
|
||||
if extra:
|
||||
if full_desc:
|
||||
full_desc += '\n\n' + '\n'.join(extra)
|
||||
else:
|
||||
full_desc = '\n'.join(extra)
|
||||
if full_desc:
|
||||
# Basic escaping of commas/semicolons per RFC is often needed; we keep it simple here
|
||||
lines.append('DESCRIPTION:%s' % full_desc.replace('\r', ' ').replace('\n', '\\n'))
|
||||
|
||||
if tags:
|
||||
try:
|
||||
cats = ','.join([t for t in tags if t])
|
||||
if cats:
|
||||
lines.append('CATEGORIES:%s' % cats)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
lines.append('END:VEVENT')
|
||||
|
||||
lines.append('END:VCALENDAR')
|
||||
|
||||
# Write file
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write("\r\n".join(lines) + "\r\n")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print ('')
|
||||
|
|
@ -604,5 +921,3 @@ if __name__ == "__main__":
|
|||
|
||||
# Call the function in the options dict
|
||||
options[ int(resp)][1]()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue