From d8a8714562387bbb1c37395295744a135e9e48fe Mon Sep 17 00:00:00 2001 From: Peter Howell Date: Fri, 29 Aug 2025 20:51:03 +0000 Subject: [PATCH] usefuleemails --- gpt.py | 144 ++-------------------- localcache2.py | 319 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 325 insertions(+), 138 deletions(-) diff --git a/gpt.py b/gpt.py index 43d3254..97fb9c1 100644 --- a/gpt.py +++ b/gpt.py @@ -426,134 +426,8 @@ def process_useful_info(): return "\n\n".join(out_chunks) OUT_JSONL = Path("cache/useful_info_summaries.jsonl") - - # --- PostgreSQL schema + insert helpers (via localcache2.db) --- - def _pg_init_schema(): - try: - from localcache2 import db as _db - CON, CURSOR = _db() - try: - CURSOR.execute( - """ - CREATE TABLE IF NOT EXISTS useful_info_summary ( - id BIGSERIAL PRIMARY KEY, - summary_hash CHAR(64) UNIQUE, - grp_index INTEGER, - subject TEXT, - thread_count INTEGER, - source TEXT, - date_label TEXT, - tags_json JSONB, - short_text TEXT, - summary_text TEXT, - attachments_json JSONB, - raw_json TEXT, - created_at TIMESTAMPTZ DEFAULT now() - ); - """ - ) - CURSOR.execute( - """ - CREATE TABLE IF NOT EXISTS useful_info_event ( - id BIGSERIAL PRIMARY KEY, - summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE, - dt TEXT, - length TEXT, - title TEXT, - description TEXT, - created_at TIMESTAMPTZ DEFAULT now() - ); - """ - ) - CON.commit() - finally: - CURSOR.close() - CON.close() - except Exception as e: - print("[warn] could not init PostgreSQL schema:", e) - - def _sha256(s): - import hashlib - return hashlib.sha256(s.encode('utf-8', 'ignore')).hexdigest() - - def _pg_insert_summary_and_events(idx, subject, count, attachments, parsed, raw): - try: - from localcache2 import db as _db - import json as _json - CON, CURSOR = _db() - try: - source = None - date_label = None - tags = None - short_text = '' - summary_text = '' - events = [] - if parsed: - source = parsed.get('source') - date_label = parsed.get('date') - tags = parsed.get('tags') - short_text = parsed.get('short') or '' - summary_text = parsed.get('summary') or '' - events = parsed.get('events') or [] - - s_hash = _sha256((subject or '') + "\n" + short_text + "\n" + summary_text) - CURSOR.execute( - """ - INSERT INTO useful_info_summary - (summary_hash, grp_index, subject, thread_count, source, date_label, - tags_json, short_text, summary_text, attachments_json, raw_json) - VALUES - (%s, %s, %s, %s, %s, %s, - CAST(%s AS JSONB), %s, %s, CAST(%s AS JSONB), %s) - ON CONFLICT (summary_hash) - DO UPDATE SET grp_index = EXCLUDED.grp_index - RETURNING id - """, - ( - s_hash, - idx, - subject, - count, - source, - date_label, - _json.dumps(tags) if tags is not None else None, - short_text, - summary_text, - _json.dumps(attachments) if attachments else None, - raw, - ), - ) - row = CURSOR.fetchone() - summary_id = row[0] if row else None - - if summary_id and isinstance(events, list): - for e in events: - try: - CURSOR.execute( - """ - INSERT INTO useful_info_event - (summary_id, dt, length, title, description) - VALUES (%s, %s, %s, %s, %s) - """, - ( - summary_id, - (e or {}).get('dt'), - (e or {}).get('length'), - (e or {}).get('title'), - (e or {}).get('description'), - ), - ) - except Exception: - pass - CON.commit() - finally: - CURSOR.close() - CON.close() - except Exception as e: - print("[warn] PostgreSQL insert failed:", e) - - # Ensure DB schema exists - _pg_init_schema() + from localcache2 import init_usefulinfo_schema, insert_usefulinfo_record + init_usefulinfo_schema() def demo_f(idx, g): print(f"[{idx}] {g['subject']} (count: {g['count']})") @@ -580,15 +454,12 @@ def process_useful_info(): with open(OUT_JSONL, "a", encoding="utf-8") as outf: outf.write(json.dumps(record, ensure_ascii=False) + "\n") - # Also persist to PostgreSQL using localcache2 + # Also persist to PostgreSQL using localcache2 with only parsed JSON if 'summary' in record: - _pg_insert_summary_and_events( - idx, record.get('subject'), record.get('count'), attach_paths, record['summary'], None - ) - else: - _pg_insert_summary_and_events( - idx, record.get('subject'), record.get('count'), attach_paths, None, record.get('summary_raw') - ) + try: + insert_usefulinfo_record(record['summary']) + except Exception as e: + print('[warn] DB insert failed:', e) for_each_group( log_path="cache/email_usefulinfo_sorted.txt", @@ -740,6 +611,7 @@ if __name__ == "__main__": 4: ['fetch "faq" mailbox and gpt summarize', fetch_faq], 5: ['list faq mailbox', list_faq], 6: ['process useful info msgs', process_useful_info], + 7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)], } diff --git a/localcache2.py b/localcache2.py index 1c520de..d2c85a8 100644 --- a/localcache2.py +++ b/localcache2.py @@ -1,6 +1,7 @@ # Local data, saving and manipulating import util import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2 +import hashlib import pandas as pd import numpy as np from collections import defaultdict @@ -576,6 +577,322 @@ def iLearn_name_from_goo(goo): return cursor.fetchone() +# -------------------- Useful Info (summaries, events, tags) -------------------- + +def init_usefulinfo_schema(): + """Create tables for summaries, events, tags, and link tables if missing.""" + CON, CUR = db() + try: + CUR.execute( + """ + CREATE TABLE IF NOT EXISTS useful_info_summary ( + id BIGSERIAL PRIMARY KEY, + summary_hash CHAR(64) UNIQUE, + source TEXT, + date_label TEXT, + short_text TEXT, + summary_text TEXT, + created_at TIMESTAMPTZ DEFAULT now() + ); + """ + ) + CUR.execute( + """ + CREATE TABLE IF NOT EXISTS useful_info_event ( + id BIGSERIAL PRIMARY KEY, + dt TEXT, + length TEXT, + title TEXT, + description TEXT, + created_at TIMESTAMPTZ DEFAULT now() + ); + """ + ) + CUR.execute( + """ + CREATE TABLE IF NOT EXISTS useful_info_summary_event ( + summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE, + event_id BIGINT NOT NULL REFERENCES useful_info_event(id) ON DELETE CASCADE, + PRIMARY KEY (summary_id, event_id) + ); + """ + ) + CUR.execute( + """ + CREATE TABLE IF NOT EXISTS useful_info_tag ( + id BIGSERIAL PRIMARY KEY, + name TEXT UNIQUE + ); + """ + ) + CUR.execute( + """ + CREATE TABLE IF NOT EXISTS useful_info_summary_tag ( + summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE, + tag_id BIGINT NOT NULL REFERENCES useful_info_tag(id) ON DELETE CASCADE, + PRIMARY KEY (summary_id, tag_id) + ); + """ + ) + CON.commit() + finally: + CUR.close(); CON.close() + + +def _sha256(s): + return hashlib.sha256(s.encode('utf-8','ignore')).hexdigest() + + +def _get_or_create_tag_id(CUR, name): + try: + CUR.execute("INSERT INTO useful_info_tag (name) VALUES (%s) ON CONFLICT (name) DO NOTHING RETURNING id", (name,)) + row = CUR.fetchone() + if row and row[0]: + return row[0] + except Exception: + pass + CUR.execute("SELECT id FROM useful_info_tag WHERE name=%s", (name,)) + row = CUR.fetchone() + return row[0] if row else None + + +def insert_usefulinfo_record(parsed): + """ + Insert a summarize_u_info() JSON result into Postgres. + Expected keys: source, date, tags (list), short, summary, events (list of {dt,length,title,description}). + Dedups summaries using a stable hash; links tags and events via link tables. + Returns summary_id. + """ + if not isinstance(parsed, dict): + return None + source = parsed.get('source') + date_label = parsed.get('date') + short_text = parsed.get('short') or '' + summary_text = parsed.get('summary') or '' + tags = parsed.get('tags') or [] + events = parsed.get('events') or [] + + s_hash = _sha256((source or '') + "\n" + (date_label or '') + "\n" + short_text + "\n" + summary_text) + + CON, CUR = db() + summary_id = None + try: + CUR.execute( + """ + INSERT INTO useful_info_summary + (summary_hash, source, date_label, short_text, summary_text) + VALUES (%s, %s, %s, %s, %s) + ON CONFLICT (summary_hash) + DO UPDATE SET short_text=EXCLUDED.short_text, summary_text=EXCLUDED.summary_text + RETURNING id + """, + (s_hash, source, date_label, short_text, summary_text) + ) + row = CUR.fetchone() + summary_id = row[0] if row else None + + # Tags + if summary_id and isinstance(tags, list): + for t in tags: + if not t: + continue + tag_id = _get_or_create_tag_id(CUR, str(t)) + if tag_id: + try: + CUR.execute( + "INSERT INTO useful_info_summary_tag (summary_id, tag_id) VALUES (%s, %s) ON CONFLICT DO NOTHING", + (summary_id, tag_id) + ) + except Exception: + pass + + # Events + if summary_id and isinstance(events, list): + for e in events: + try: + CUR.execute( + """ + INSERT INTO useful_info_event (dt, length, title, description) + VALUES (%s, %s, %s, %s) + RETURNING id + """, + ( + (e or {}).get('dt'), + (e or {}).get('length'), + (e or {}).get('title'), + (e or {}).get('description'), + ) + ) + evrow = CUR.fetchone() + if evrow and evrow[0]: + CUR.execute( + "INSERT INTO useful_info_summary_event (summary_id, event_id) VALUES (%s, %s) ON CONFLICT DO NOTHING", + (summary_id, evrow[0]) + ) + except Exception: + pass + + CON.commit() + finally: + CUR.close(); CON.close() + return summary_id + + +def export_usefulinfo_events_to_ics(filepath='cache/useful_info_events.ics'): + """Export events from useful info tables to an .ics file. + - Attempts to parse dt and length into DTSTART/DTEND. + - Includes title (SUMMARY), description (DESCRIPTION), and tags (CATEGORIES). + """ + from datetime import datetime, timedelta + from dateutil import parser as dtparser + + CON, CUR = db() + try: + # Pull events with linked summary and aggregated tags + CUR.execute( + """ + SELECT e.id, e.dt, e.length, e.title, e.description, + s.source, s.date_label, s.short_text, + COALESCE(array_agg(t.name) FILTER (WHERE t.name IS NOT NULL), '{}') AS tags + FROM useful_info_event e + JOIN useful_info_summary_event se ON se.event_id = e.id + JOIN useful_info_summary s ON s.id = se.summary_id + LEFT JOIN useful_info_summary_tag st ON st.summary_id = s.id + LEFT JOIN useful_info_tag t ON t.id = st.tag_id + GROUP BY e.id, s.source, s.date_label, s.short_text + ORDER BY e.id + """ + ) + rows = CUR.fetchall() + finally: + CUR.close(); CON.close() + + def _parse_minutes(length_str): + if not length_str: + return 60 + try: + n = int(length_str) + if n <= 12: + return n * 60 + return n + except Exception: + pass + m = re.findall(r"(\d+(?:\.\d+)?)\s*([hm])", length_str, flags=re.I) + minutes = 0 + if m: + for num, unit in m: + try: + val = float(num) + if unit.lower() == 'h': + minutes += int(val * 60) + else: + minutes += int(val) + except Exception: + pass + if minutes > 0: + return minutes + return 60 + + def _has_time_component(s): + if not s: + return False + if re.search(r"\d\d?:\d\d", s): + return True + if re.search(r"\b(am|pm)\b", s, re.I): + return True + return False + + def _format_dt(dtobj): + # Local time (floating) format + return dtobj.strftime('%Y%m%dT%H%M%S') + + now_utc = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + + lines = [] + lines.append('BEGIN:VCALENDAR') + lines.append('VERSION:2.0') + lines.append('PRODID:-//canvasapp//Useful Info Events//EN') + lines.append('CALSCALE:GREGORIAN') + lines.append('METHOD:PUBLISH') + lines.append('X-WR-CALNAME:Useful Info') + + for r in rows: + ev_id = r[0] + dt_str = r[1] + length_str = r[2] + title = r[3] or '' + desc = r[4] or '' + source = r[5] or '' + date_label = r[6] or '' + short_text = r[7] or '' + tags = r[8] or [] + + # Try to parse DTSTART/DTEND + all_day = not _has_time_component(str(dt_str)) + dtstart = None + dtend = None + try: + if dt_str: + parsed = dtparser.parse(str(dt_str), fuzzy=True) + if all_day: + # All-day event + dtstart = parsed.date() + dtend = (parsed.date() + timedelta(days=1)) + else: + dtstart = parsed + minutes = _parse_minutes(str(length_str)) + dtend = parsed + timedelta(minutes=minutes) + except Exception: + # If we cannot parse date, skip this event + continue + + lines.append('BEGIN:VEVENT') + lines.append('UID:usefulinfo-event-%s@gavilan' % ev_id) + lines.append('DTSTAMP:%s' % now_utc) + + if all_day and dtstart and dtend: + lines.append('DTSTART;VALUE=DATE:%s' % dtstart.strftime('%Y%m%d')) + lines.append('DTEND;VALUE=DATE:%s' % dtend.strftime('%Y%m%d')) + elif dtstart and dtend: + lines.append('DTSTART:%s' % _format_dt(dtstart)) + lines.append('DTEND:%s' % _format_dt(dtend)) + + if title: + lines.append('SUMMARY:%s' % title.replace('\n', ' ').replace('\r', ' ')) + + full_desc = desc + extra = [] + if short_text: + extra.append('Context: ' + short_text) + if source or date_label: + extra.append('Source: %s Date label: %s' % (source, date_label)) + if extra: + if full_desc: + full_desc += '\n\n' + '\n'.join(extra) + else: + full_desc = '\n'.join(extra) + if full_desc: + # Basic escaping of commas/semicolons per RFC is often needed; we keep it simple here + lines.append('DESCRIPTION:%s' % full_desc.replace('\r', ' ').replace('\n', '\\n')) + + if tags: + try: + cats = ','.join([t for t in tags if t]) + if cats: + lines.append('CATEGORIES:%s' % cats) + except Exception: + pass + + lines.append('END:VEVENT') + + lines.append('END:VCALENDAR') + + # Write file + with open(filepath, 'w', encoding='utf-8') as f: + f.write("\r\n".join(lines) + "\r\n") + + + if __name__ == "__main__": print ('') @@ -604,5 +921,3 @@ if __name__ == "__main__": # Call the function in the options dict options[ int(resp)][1]() - -