Compare commits

...

2 Commits

2 changed files with 325 additions and 138 deletions

144
gpt.py
View File

@ -425,134 +425,8 @@ def process_useful_info():
return "\n\n".join(out_chunks)
OUT_JSONL = Path("cache/useful_info_summaries.jsonl")
# --- PostgreSQL schema + insert helpers (via localcache2.db) ---
def _pg_init_schema():
try:
from localcache2 import db as _db
CON, CURSOR = _db()
try:
CURSOR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_summary (
id BIGSERIAL PRIMARY KEY,
summary_hash CHAR(64) UNIQUE,
grp_index INTEGER,
subject TEXT,
thread_count INTEGER,
source TEXT,
date_label TEXT,
tags_json JSONB,
short_text TEXT,
summary_text TEXT,
attachments_json JSONB,
raw_json TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
"""
)
CURSOR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_event (
id BIGSERIAL PRIMARY KEY,
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
dt TEXT,
length TEXT,
title TEXT,
description TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
"""
)
CON.commit()
finally:
CURSOR.close()
CON.close()
except Exception as e:
print("[warn] could not init PostgreSQL schema:", e)
def _sha256(s):
import hashlib
return hashlib.sha256(s.encode('utf-8', 'ignore')).hexdigest()
def _pg_insert_summary_and_events(idx, subject, count, attachments, parsed, raw):
try:
from localcache2 import db as _db
import json as _json
CON, CURSOR = _db()
try:
source = None
date_label = None
tags = None
short_text = ''
summary_text = ''
events = []
if parsed:
source = parsed.get('source')
date_label = parsed.get('date')
tags = parsed.get('tags')
short_text = parsed.get('short') or ''
summary_text = parsed.get('summary') or ''
events = parsed.get('events') or []
s_hash = _sha256((subject or '') + "\n" + short_text + "\n" + summary_text)
CURSOR.execute(
"""
INSERT INTO useful_info_summary
(summary_hash, grp_index, subject, thread_count, source, date_label,
tags_json, short_text, summary_text, attachments_json, raw_json)
VALUES
(%s, %s, %s, %s, %s, %s,
CAST(%s AS JSONB), %s, %s, CAST(%s AS JSONB), %s)
ON CONFLICT (summary_hash)
DO UPDATE SET grp_index = EXCLUDED.grp_index
RETURNING id
""",
(
s_hash,
idx,
subject,
count,
source,
date_label,
_json.dumps(tags) if tags is not None else None,
short_text,
summary_text,
_json.dumps(attachments) if attachments else None,
raw,
),
)
row = CURSOR.fetchone()
summary_id = row[0] if row else None
if summary_id and isinstance(events, list):
for e in events:
try:
CURSOR.execute(
"""
INSERT INTO useful_info_event
(summary_id, dt, length, title, description)
VALUES (%s, %s, %s, %s, %s)
""",
(
summary_id,
(e or {}).get('dt'),
(e or {}).get('length'),
(e or {}).get('title'),
(e or {}).get('description'),
),
)
except Exception:
pass
CON.commit()
finally:
CURSOR.close()
CON.close()
except Exception as e:
print("[warn] PostgreSQL insert failed:", e)
# Ensure DB schema exists
_pg_init_schema()
from localcache2 import init_usefulinfo_schema, insert_usefulinfo_record
init_usefulinfo_schema()
def demo_f(idx, g):
print(f"[{idx}] {g['subject']} (count: {g['count']})")
@ -579,15 +453,12 @@ def process_useful_info():
with open(OUT_JSONL, "a", encoding="utf-8") as outf:
outf.write(json.dumps(record, ensure_ascii=False) + "\n")
# Also persist to PostgreSQL using localcache2
# Also persist to PostgreSQL using localcache2 with only parsed JSON
if 'summary' in record:
_pg_insert_summary_and_events(
idx, record.get('subject'), record.get('count'), attach_paths, record['summary'], None
)
else:
_pg_insert_summary_and_events(
idx, record.get('subject'), record.get('count'), attach_paths, None, record.get('summary_raw')
)
try:
insert_usefulinfo_record(record['summary'])
except Exception as e:
print('[warn] DB insert failed:', e)
for_each_group(
log_path="cache/email_usefulinfo_sorted.txt",
@ -739,6 +610,7 @@ if __name__ == "__main__":
4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
5: ['list faq mailbox', list_faq],
6: ['process useful info msgs', process_useful_info],
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
}

View File

@ -1,6 +1,7 @@
# Local data, saving and manipulating
import util
import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
import hashlib
import pandas as pd
import numpy as np
from collections import defaultdict
@ -576,6 +577,322 @@ def iLearn_name_from_goo(goo):
return cursor.fetchone()
# -------------------- Useful Info (summaries, events, tags) --------------------
def init_usefulinfo_schema():
"""Create tables for summaries, events, tags, and link tables if missing."""
CON, CUR = db()
try:
CUR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_summary (
id BIGSERIAL PRIMARY KEY,
summary_hash CHAR(64) UNIQUE,
source TEXT,
date_label TEXT,
short_text TEXT,
summary_text TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
"""
)
CUR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_event (
id BIGSERIAL PRIMARY KEY,
dt TEXT,
length TEXT,
title TEXT,
description TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
"""
)
CUR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_summary_event (
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
event_id BIGINT NOT NULL REFERENCES useful_info_event(id) ON DELETE CASCADE,
PRIMARY KEY (summary_id, event_id)
);
"""
)
CUR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_tag (
id BIGSERIAL PRIMARY KEY,
name TEXT UNIQUE
);
"""
)
CUR.execute(
"""
CREATE TABLE IF NOT EXISTS useful_info_summary_tag (
summary_id BIGINT NOT NULL REFERENCES useful_info_summary(id) ON DELETE CASCADE,
tag_id BIGINT NOT NULL REFERENCES useful_info_tag(id) ON DELETE CASCADE,
PRIMARY KEY (summary_id, tag_id)
);
"""
)
CON.commit()
finally:
CUR.close(); CON.close()
def _sha256(s):
return hashlib.sha256(s.encode('utf-8','ignore')).hexdigest()
def _get_or_create_tag_id(CUR, name):
try:
CUR.execute("INSERT INTO useful_info_tag (name) VALUES (%s) ON CONFLICT (name) DO NOTHING RETURNING id", (name,))
row = CUR.fetchone()
if row and row[0]:
return row[0]
except Exception:
pass
CUR.execute("SELECT id FROM useful_info_tag WHERE name=%s", (name,))
row = CUR.fetchone()
return row[0] if row else None
def insert_usefulinfo_record(parsed):
"""
Insert a summarize_u_info() JSON result into Postgres.
Expected keys: source, date, tags (list), short, summary, events (list of {dt,length,title,description}).
Dedups summaries using a stable hash; links tags and events via link tables.
Returns summary_id.
"""
if not isinstance(parsed, dict):
return None
source = parsed.get('source')
date_label = parsed.get('date')
short_text = parsed.get('short') or ''
summary_text = parsed.get('summary') or ''
tags = parsed.get('tags') or []
events = parsed.get('events') or []
s_hash = _sha256((source or '') + "\n" + (date_label or '') + "\n" + short_text + "\n" + summary_text)
CON, CUR = db()
summary_id = None
try:
CUR.execute(
"""
INSERT INTO useful_info_summary
(summary_hash, source, date_label, short_text, summary_text)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (summary_hash)
DO UPDATE SET short_text=EXCLUDED.short_text, summary_text=EXCLUDED.summary_text
RETURNING id
""",
(s_hash, source, date_label, short_text, summary_text)
)
row = CUR.fetchone()
summary_id = row[0] if row else None
# Tags
if summary_id and isinstance(tags, list):
for t in tags:
if not t:
continue
tag_id = _get_or_create_tag_id(CUR, str(t))
if tag_id:
try:
CUR.execute(
"INSERT INTO useful_info_summary_tag (summary_id, tag_id) VALUES (%s, %s) ON CONFLICT DO NOTHING",
(summary_id, tag_id)
)
except Exception:
pass
# Events
if summary_id and isinstance(events, list):
for e in events:
try:
CUR.execute(
"""
INSERT INTO useful_info_event (dt, length, title, description)
VALUES (%s, %s, %s, %s)
RETURNING id
""",
(
(e or {}).get('dt'),
(e or {}).get('length'),
(e or {}).get('title'),
(e or {}).get('description'),
)
)
evrow = CUR.fetchone()
if evrow and evrow[0]:
CUR.execute(
"INSERT INTO useful_info_summary_event (summary_id, event_id) VALUES (%s, %s) ON CONFLICT DO NOTHING",
(summary_id, evrow[0])
)
except Exception:
pass
CON.commit()
finally:
CUR.close(); CON.close()
return summary_id
def export_usefulinfo_events_to_ics(filepath='cache/useful_info_events.ics'):
"""Export events from useful info tables to an .ics file.
- Attempts to parse dt and length into DTSTART/DTEND.
- Includes title (SUMMARY), description (DESCRIPTION), and tags (CATEGORIES).
"""
from datetime import datetime, timedelta
from dateutil import parser as dtparser
CON, CUR = db()
try:
# Pull events with linked summary and aggregated tags
CUR.execute(
"""
SELECT e.id, e.dt, e.length, e.title, e.description,
s.source, s.date_label, s.short_text,
COALESCE(array_agg(t.name) FILTER (WHERE t.name IS NOT NULL), '{}') AS tags
FROM useful_info_event e
JOIN useful_info_summary_event se ON se.event_id = e.id
JOIN useful_info_summary s ON s.id = se.summary_id
LEFT JOIN useful_info_summary_tag st ON st.summary_id = s.id
LEFT JOIN useful_info_tag t ON t.id = st.tag_id
GROUP BY e.id, s.source, s.date_label, s.short_text
ORDER BY e.id
"""
)
rows = CUR.fetchall()
finally:
CUR.close(); CON.close()
def _parse_minutes(length_str):
if not length_str:
return 60
try:
n = int(length_str)
if n <= 12:
return n * 60
return n
except Exception:
pass
m = re.findall(r"(\d+(?:\.\d+)?)\s*([hm])", length_str, flags=re.I)
minutes = 0
if m:
for num, unit in m:
try:
val = float(num)
if unit.lower() == 'h':
minutes += int(val * 60)
else:
minutes += int(val)
except Exception:
pass
if minutes > 0:
return minutes
return 60
def _has_time_component(s):
if not s:
return False
if re.search(r"\d\d?:\d\d", s):
return True
if re.search(r"\b(am|pm)\b", s, re.I):
return True
return False
def _format_dt(dtobj):
# Local time (floating) format
return dtobj.strftime('%Y%m%dT%H%M%S')
now_utc = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
lines = []
lines.append('BEGIN:VCALENDAR')
lines.append('VERSION:2.0')
lines.append('PRODID:-//canvasapp//Useful Info Events//EN')
lines.append('CALSCALE:GREGORIAN')
lines.append('METHOD:PUBLISH')
lines.append('X-WR-CALNAME:Useful Info')
for r in rows:
ev_id = r[0]
dt_str = r[1]
length_str = r[2]
title = r[3] or ''
desc = r[4] or ''
source = r[5] or ''
date_label = r[6] or ''
short_text = r[7] or ''
tags = r[8] or []
# Try to parse DTSTART/DTEND
all_day = not _has_time_component(str(dt_str))
dtstart = None
dtend = None
try:
if dt_str:
parsed = dtparser.parse(str(dt_str), fuzzy=True)
if all_day:
# All-day event
dtstart = parsed.date()
dtend = (parsed.date() + timedelta(days=1))
else:
dtstart = parsed
minutes = _parse_minutes(str(length_str))
dtend = parsed + timedelta(minutes=minutes)
except Exception:
# If we cannot parse date, skip this event
continue
lines.append('BEGIN:VEVENT')
lines.append('UID:usefulinfo-event-%s@gavilan' % ev_id)
lines.append('DTSTAMP:%s' % now_utc)
if all_day and dtstart and dtend:
lines.append('DTSTART;VALUE=DATE:%s' % dtstart.strftime('%Y%m%d'))
lines.append('DTEND;VALUE=DATE:%s' % dtend.strftime('%Y%m%d'))
elif dtstart and dtend:
lines.append('DTSTART:%s' % _format_dt(dtstart))
lines.append('DTEND:%s' % _format_dt(dtend))
if title:
lines.append('SUMMARY:%s' % title.replace('\n', ' ').replace('\r', ' '))
full_desc = desc
extra = []
if short_text:
extra.append('Context: ' + short_text)
if source or date_label:
extra.append('Source: %s Date label: %s' % (source, date_label))
if extra:
if full_desc:
full_desc += '\n\n' + '\n'.join(extra)
else:
full_desc = '\n'.join(extra)
if full_desc:
# Basic escaping of commas/semicolons per RFC is often needed; we keep it simple here
lines.append('DESCRIPTION:%s' % full_desc.replace('\r', ' ').replace('\n', '\\n'))
if tags:
try:
cats = ','.join([t for t in tags if t])
if cats:
lines.append('CATEGORIES:%s' % cats)
except Exception:
pass
lines.append('END:VEVENT')
lines.append('END:VCALENDAR')
# Write file
with open(filepath, 'w', encoding='utf-8') as f:
f.write("\r\n".join(lines) + "\r\n")
if __name__ == "__main__":
print ('')
@ -604,5 +921,3 @@ if __name__ == "__main__":
# Call the function in the options dict
options[ int(resp)][1]()