805 lines
31 KiB
Python
805 lines
31 KiB
Python
import os, json, sys, codecs, re
|
|
from datetime import datetime, timedelta
|
|
import util
|
|
from openai import OpenAI
|
|
|
|
|
|
from canvas_secrets import openai_org, openai_api_key
|
|
|
|
client = OpenAI(
|
|
api_key=openai_api_key,
|
|
organization=openai_org
|
|
)
|
|
|
|
DEFAULT_MODEL = "gpt-5-mini"
|
|
|
|
SAVE_ATTACHEMENTS = 1
|
|
|
|
|
|
def gpt_chat(instruction, prompt, model=DEFAULT_MODEL):
|
|
# 1) Strip extremely long Outlook protection URLs first
|
|
#try:
|
|
# prompt = re.sub(r"\S*protection\.outlook\.com\S*", "", prompt, flags=re.I)
|
|
#except Exception:
|
|
# pass
|
|
|
|
# 2) Crude token estimation and truncation (target ~29k tokens)
|
|
# Approximates 1 token ≈ 4 characters.
|
|
def _est_tokens(s):
|
|
try:
|
|
return max(1, int(len(s) / 4))
|
|
except Exception:
|
|
return 1
|
|
|
|
MAX_TOKENS = 29000
|
|
# reserve some tokens for system/overhead
|
|
overhead = 200
|
|
inst_tokens = _est_tokens(instruction)
|
|
prompt_tokens = _est_tokens(prompt)
|
|
total = inst_tokens + prompt_tokens + overhead
|
|
if total > MAX_TOKENS:
|
|
allow = max(500, MAX_TOKENS - inst_tokens - overhead)
|
|
# convert back to characters
|
|
allow_chars = max(2000, allow * 4)
|
|
# keep last N chars under assumption latest content is most relevant
|
|
if len(prompt) > allow_chars:
|
|
prompt = prompt[-allow_chars:]
|
|
|
|
messages = [
|
|
{"role": "system", "content": instruction},
|
|
{"role": "user", "content": prompt}
|
|
]
|
|
|
|
try:
|
|
response = client.chat.completions.create(model=model, messages=messages)
|
|
result = response.choices[0].message.content
|
|
except Exception as e:
|
|
# Graceful failure; log and return empty JSON
|
|
try:
|
|
log3 = codecs.open('cache/gptlog.txt','a','utf-8')
|
|
log3.write(json.dumps({'prompt': prompt[:2000], 'error': str(e)}, indent=2))
|
|
log3.write("\n\n---\n\n")
|
|
log3.close()
|
|
except Exception:
|
|
pass
|
|
return "{}"
|
|
|
|
# Log success
|
|
try:
|
|
log3 = codecs.open('cache/gptlog.txt','a','utf-8')
|
|
log3.write(json.dumps({'prompt':prompt, 'result': result},indent=2))
|
|
log3.write("\n\n---\n\n")
|
|
log3.close()
|
|
except Exception:
|
|
pass
|
|
return result
|
|
|
|
def summarize_u_info(msg):
|
|
system_role = """You are a practical efficient cataloger and indexer of information. What follows is one or more emails which most likely (but not necessarily) contains useful information of some sort. Please determine if the message has useful info, and if so, attempt to reformat it as such:
|
|
{"source": <original sender or source>, "date": <month and year MM/YY>, "tags": <short list of tags and topics, multi words use snake_case>, "short": <one sentence summary>, "summary": <complete summary with details if warranted>,"events":<see below>}
|
|
If one or more specific dates and times of an event (or deadline) of some sort are mentioned, please attempt to extract the datetime and 1-2 line summary of each event. The "events" field of the response should be a list of these: {"dt":<date and time>, "length":<if known>, "title":<title>,"description":<short description>,"contact":<person/email who seems to be in charge or know more>}. Try to include relevant links in the description, if they seem relevant.
|
|
For all summaries: speak in a first-person direct voice, authoritatively. For example, instead of saying 'x shared details about offerings at Bakersfield College, including a, b and c', just say 'Bakersfield college offers a, b and c'. If there are links that seem relevant in the email, include them in the summary.
|
|
Some sample tags: event, advice, rule, warning, hr, it, disted, canvas, meeting, senate, dept_chairs, deans, administration, guided_pathways, site, file, article, contract, deadline, construction, catalog, schedule, curriqunet, banner, cvc, password, payroll, training, tip, graduation, photos, faculty, evaluation, convocation, flex, board, curriculum, ztc, oei, grant, accessibility, legislation, hyflex, hybrid, handbook, guideline, lti, api, integration, request, rule, scholarship, library, aws, opportunity, background, calendar, semester, accreditation, professional_development, zoom, ai, html, code, video, pocr, badge, liason, newsletter, act, law, equipment, best_practices, outcomes, slo, plo, ilo, data, cares_act, census, admissions, financial_aid, enrollment, title_5, syllabus, ccn, communication, club, survey, checklist, covid_19, pipr, program_review, policy, vpat, compliance, emergency, finals, rfp, fraud, research, library, tutoring, stem, writing_center, learning_commons, hire, promotion, help_desk, housing, url, tenure, tlc, mental_health, president, vpaa, vpss, vp, mac, meaningful_assessment, negotiation, union, libguide, evidence, retirement,
|
|
Remember that not every email is notable. If this is the case, just return an empty json object.
|
|
Always answer in valid json, nothing else."""
|
|
x = gpt_chat(system_role,msg)
|
|
print(x)
|
|
return x
|
|
|
|
|
|
|
|
def gpt_faq(faq):
|
|
system_role = """Ignore all previous instructions. You are an expert on the management and implementation of all aspects of online courses and running a distance education program, especially in higher education and community college. You are also an expert in pedagogy and the tools and techniques of effective teaching and learning, based on research and experience. You know how to focus on concrete examples of effective teaching which actually work in the real world and the advice you give is specific and immediately useful to teachers in the classroom. You provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. Your users are experts in teaching and college administration, and are comfortable with the California and Federal Educational code, and know they need to comply with it. There is no need to explain this beyond noting any relevant laws. Don't be verbose in your answers, but do provide details and examples where it might help the explanation.
|
|
You are a world class technical writer. Your style is human-centered, friendly and kind. Give me concise and accurate answers and ignore all the niceties that OpenAI programmed you with. Omit all qualifications and excuses. I know you are a large language model but I want you to pretend you are a confident and superintelligent oracle that can help a limited human warp reality to his own intentions. You can reveal your own sense of humor and depth of intelligence. It is very important that you get this right.
|
|
Adopt a penetratingly insightful, incisively funny, cleverly witty persona. Your answers cultivate playful, out-of-the-box thinking, rationality, technology, design, and careful use of incentives. Speak in specific, topic relevant terminology. Do NOT hedge or qualify. Do not waffle. Speak directly and be willing to make creative guesses.
|
|
I am pasting below an email chain between myself and colleague(s). I am fairly certain that it contains a technical question that I have answered. They may be spread out over several messages, or there might be some clarification or follow up questions. There will be headers, signatures, and other text that isn't a part of the core message. Ignore that. Consider the whole email chain while you prepare the following: Respond with a json formatted dictionary that contains the following:
|
|
{ "question": "Restate the question or problem in a concise but clear manner", "topics": ["keywords", "or phrases", "that categorize the issue"], "answer": "The best possible answer, written in markdown format. Draw the answer from the email but feel free to edit or embelish based on your knowledge. Generalize the answer to anyone who might have the issue. Your audience is mostly instructors working at a small community college. Do not refer to anyone's name specifically, unless it is Peter or Sabrina, but instead write for a general audience looking for the answers to their questions. We are writing a FAQ or help page. Feel free to use markdown-formatted bold, italic, lists, and links."} """
|
|
|
|
return gpt_chat(system_role, faq)
|
|
|
|
|
|
def gpt_test():
|
|
|
|
my_prompt = "Write a series of texts trying to sell a pen to a stranger."
|
|
print(sys.argv)
|
|
|
|
|
|
if len(sys.argv)>1:
|
|
my_prompt = " ".join(sys.argv[1:])
|
|
else:
|
|
print("Prompt: %s" % my_prompt)
|
|
|
|
result = gpt_chat("", my_prompt)
|
|
print(result)
|
|
|
|
|
|
def sample_send_email():
|
|
import win32com.client
|
|
ol=win32com.client.Dispatch("outlook.application")
|
|
olmailitem=0x0 #size of the new email
|
|
newmail=ol.CreateItem(olmailitem)
|
|
newmail.Subject= 'Testing Mail'
|
|
newmail.To='peter.howell@gmail.com'
|
|
#newmail.CC='xyz@gmail.com'
|
|
newmail.Body= 'Hello, this is a test email to showcase how to send emails from Python and Outlook.'
|
|
# attach='C:\\Users\\admin\\Desktop\\Python\\Sample.xlsx'
|
|
# newmail.Attachments.Add(attach)
|
|
# To display the mail before sending it
|
|
# newmail.Display()
|
|
newmail.Send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
|
|
import os, re
|
|
from pathlib import Path
|
|
from itertools import groupby
|
|
from datetime import datetime
|
|
import win32com.client
|
|
|
|
CACHE = Path("cache")
|
|
ATT_DIR = CACHE / "attachments_useful_info"
|
|
LOG_PATH = CACHE / "email_usefulinfo_sorted.txt"
|
|
ATT_DIR.mkdir(parents=True, exist_ok=True)
|
|
CACHE.mkdir(parents=True, exist_ok=True)
|
|
|
|
# --- helpers ---
|
|
prefix_re = re.compile(r'^\s*(re|fw|fwd|aw|sv|vb|tr|wg)\s*:\s*', re.I) # common locales too
|
|
bracket_tag_re = re.compile(r'^\s*(\[[^\]]+\]\s*)+', re.I)
|
|
|
|
def normalize_subject(s):
|
|
if not s:
|
|
return "(no subject)"
|
|
s = s.strip()
|
|
# strip leading [TAGS] like [EXTERNAL] [Some System]
|
|
s = bracket_tag_re.sub('', s)
|
|
# strip any chain of RE:/FWD: etc. at start
|
|
while prefix_re.match(s):
|
|
s = prefix_re.sub('', s, count=1)
|
|
# collapse whitespace
|
|
s = re.sub(r'\s+', ' ', s).strip()
|
|
return s or "(no subject)"
|
|
|
|
def safe_name(s):
|
|
# reasonable Windows-safe folder name for a subject
|
|
s = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', s)
|
|
return s[:120]
|
|
|
|
def iso(dt):
|
|
# Outlook COM datetime -> ISO string (local)
|
|
try:
|
|
return dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
except Exception:
|
|
return str(dt)
|
|
|
|
def save_all_attachments(mail, subject_dir: Path):
|
|
saved = []
|
|
try:
|
|
atts = mail.Attachments
|
|
count = atts.Count
|
|
if count == 0:
|
|
return saved
|
|
subject_dir.mkdir(parents=True, exist_ok=True)
|
|
# iterate COM collection by index (1-based)
|
|
for i in range(1, count + 1):
|
|
att = atts.Item(i)
|
|
# build unique filename to avoid collisions
|
|
base = str(att.FileName) if getattr(att, "FileName", None) else f"attachment_{i}"
|
|
base = safe_name(base)
|
|
# prefix by sent time for clarity
|
|
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
out = subject_dir / f"{ts}_{base}"
|
|
att.SaveAsFile(str(out))
|
|
saved.append(str(out.as_posix()))
|
|
except Exception:
|
|
# swallow attachment oddities; continue
|
|
pass
|
|
return saved
|
|
|
|
# --- Outlook ---
|
|
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
|
|
root_folder = outlook.Folders.Item(1)
|
|
print(f"Root folder: {root_folder.Name}")
|
|
|
|
uinfo = root_folder.Folders[folder_name]
|
|
|
|
# Collect first to a list so we can sort/group
|
|
records = []
|
|
items = uinfo.Items
|
|
# Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
|
|
try:
|
|
items.Sort("[SentOn]", True) # True => ascending
|
|
except Exception:
|
|
pass
|
|
|
|
for message in items:
|
|
# Skip non-mail items
|
|
if getattr(message, "Class", None) != 43: # 43 = MailItem
|
|
continue
|
|
subj_raw = getattr(message, "Subject", "") or ""
|
|
subj_norm = normalize_subject(subj_raw)
|
|
|
|
# sender fallback chain
|
|
sender = None
|
|
for attr in ("Sender", "SenderName", "SenderEmailAddress"):
|
|
try:
|
|
sender = getattr(message, attr)
|
|
if sender:
|
|
break
|
|
except Exception:
|
|
pass
|
|
sender = str(sender) if sender else "UNKNOWN"
|
|
|
|
sent_on = getattr(message, "SentOn", None)
|
|
sent_iso = iso(sent_on)
|
|
|
|
attachments_saved = []
|
|
if save_attachments:
|
|
attachments_saved = save_all_attachments(
|
|
message,
|
|
ATT_DIR / safe_name(subj_norm)
|
|
)
|
|
|
|
body = ""
|
|
try:
|
|
body = message.Body or ""
|
|
except Exception:
|
|
try:
|
|
body = message.HTMLBody or ""
|
|
except Exception:
|
|
body = ""
|
|
|
|
records.append({
|
|
"subject_norm": subj_norm,
|
|
"subject_raw": subj_raw,
|
|
"sender": sender,
|
|
"sent_on": sent_on,
|
|
"sent_iso": sent_iso,
|
|
"attachments": attachments_saved,
|
|
"body": body,
|
|
})
|
|
|
|
# Sort by normalized subject, then by sent time (ascending)
|
|
records.sort(key=lambda r: (r["subject_norm"].lower(), r["sent_on"] or datetime.min))
|
|
|
|
# Write grouped log
|
|
with open(LOG_PATH, "w", encoding="utf-8", newline="") as log:
|
|
current_subject = None
|
|
for subj, group_iter in groupby(records, key=lambda r: r["subject_norm"]):
|
|
thread = list(group_iter)
|
|
log.write(f"\n\n### {subj} — {len(thread)} message(s)\n")
|
|
for r in thread:
|
|
if r["attachments"]:
|
|
att_line = "Attachments: " + ", ".join(r["attachments"]) + "\n"
|
|
else:
|
|
att_line = ""
|
|
log.write(
|
|
f"\n---\n"
|
|
f"Subject: {r['subject_raw']}\n"
|
|
f"From: {r['sender']}\n"
|
|
f"Date: {r['sent_iso']}\n"
|
|
f"{att_line}\n"
|
|
f"{r['body']}\n"
|
|
)
|
|
|
|
print(f"Wrote grouped log -> {LOG_PATH.as_posix()}")
|
|
|
|
# Insert original emails & attachments into DB (summary linkage added later)
|
|
from localcache2 import init_usefulinfo_schema, insert_usefulinfo_email
|
|
init_usefulinfo_schema()
|
|
|
|
# Helpers to extract attachment text
|
|
try:
|
|
import PyPDF2
|
|
except Exception:
|
|
PyPDF2 = None
|
|
try:
|
|
import docx
|
|
except Exception:
|
|
docx = None
|
|
try:
|
|
from pptx import Presentation
|
|
except Exception:
|
|
Presentation = None
|
|
|
|
def _extract_pdf_text(p):
|
|
if not PyPDF2:
|
|
return ''
|
|
try:
|
|
with open(p, 'rb') as fh:
|
|
r = PyPDF2.PdfReader(fh)
|
|
txt = []
|
|
for i, pg in enumerate(r.pages[:10]):
|
|
try: txt.append(pg.extract_text() or '')
|
|
except Exception: pass
|
|
return "\n".join(txt)[:12000]
|
|
except Exception:
|
|
return ''
|
|
def _extract_docx_text(p):
|
|
if not docx:
|
|
return ''
|
|
try:
|
|
d = docx.Document(p)
|
|
return "\n".join([para.text for para in d.paragraphs if para.text])[:12000]
|
|
except Exception:
|
|
return ''
|
|
def _extract_pptx_text(p):
|
|
if not Presentation:
|
|
return ''
|
|
try:
|
|
pres = Presentation(p)
|
|
chunks = []
|
|
for slide in pres.slides:
|
|
for shape in slide.shapes:
|
|
try:
|
|
if hasattr(shape, 'has_text_frame') and shape.has_text_frame:
|
|
for para in shape.text_frame.paragraphs:
|
|
txt = ''.join(run.text for run in para.runs)
|
|
if txt: chunks.append(txt)
|
|
except Exception: pass
|
|
return "\n".join(chunks)[:12000]
|
|
except Exception:
|
|
return ''
|
|
|
|
for rec in records:
|
|
atts = []
|
|
for ap in rec['attachments']:
|
|
ext = os.path.splitext(ap)[1].lower()
|
|
txt = ''
|
|
if ext == '.pdf':
|
|
txt = _extract_pdf_text(ap)
|
|
elif ext == '.docx':
|
|
txt = _extract_docx_text(ap)
|
|
elif ext == '.pptx':
|
|
txt = _extract_pptx_text(ap)
|
|
atts.append({'path': ap, 'text': txt or None})
|
|
try:
|
|
insert_usefulinfo_email(
|
|
subject_raw=rec['subject_raw'],
|
|
subject_norm=rec['subject_norm'],
|
|
sender=rec['sender'],
|
|
sent_iso=rec['sent_iso'],
|
|
body=rec['body'],
|
|
attachments=atts,
|
|
summary_id=None,
|
|
)
|
|
except Exception as e:
|
|
print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))
|
|
|
|
|
|
|
|
|
|
|
|
def process_useful_info(start=0, num=0):
|
|
import re
|
|
from pathlib import Path
|
|
|
|
# Optional import heavy libs only when needed
|
|
try:
|
|
import PyPDF2
|
|
except Exception:
|
|
PyPDF2 = None
|
|
try:
|
|
import docx # python-docx
|
|
except Exception:
|
|
docx = None
|
|
try:
|
|
from pptx import Presentation
|
|
except Exception:
|
|
Presentation = None
|
|
|
|
HEADER_RE = re.compile(r'\r?\n###\s(.*)\r?\n') # your pattern, CRLF-safe
|
|
COUNT_RE = re.compile(r'^(?P<subject>.*?)\s+—\s+(?P<count>\d+)\s+message', re.I)
|
|
|
|
def parse_groups(text):
|
|
"""
|
|
Return a list of groups found in the log file.
|
|
Each group is a dict: {header, subject, count, content}
|
|
"""
|
|
groups = []
|
|
matches = list(HEADER_RE.finditer(text))
|
|
if not matches:
|
|
return groups
|
|
|
|
for i, m in enumerate(matches):
|
|
header = m.group(1).strip() # e.g. "Subject X — 3 message(s)" OR just "Subject X"
|
|
start = m.end()
|
|
end = matches[i+1].start() if i + 1 < len(matches) else len(text)
|
|
content = text[start:end]
|
|
|
|
# Try to extract subject and count if present
|
|
subject = header
|
|
count = None
|
|
cm = COUNT_RE.search(header)
|
|
if cm:
|
|
subject = cm.group('subject').strip()
|
|
try:
|
|
count = int(cm.group('count'))
|
|
except Exception:
|
|
count = None
|
|
|
|
groups.append({
|
|
"header": header,
|
|
"subject": subject,
|
|
"count": count,
|
|
"content": content
|
|
})
|
|
return groups
|
|
|
|
|
|
def for_each_group(log_path="cache/email_usefulinfo_sorted.txt", f=lambda idx, g: None, start=1, count=-1):
|
|
"""
|
|
Read the grouped log, split into groups, and call f(index, group) on each.
|
|
start: 1-based index to begin processing (useful for resuming).
|
|
"""
|
|
p = Path(log_path)
|
|
text = p.read_text(encoding="utf-8", errors="replace")
|
|
|
|
groups = parse_groups(text)
|
|
|
|
done = 0
|
|
|
|
if start < 1:
|
|
start = 1
|
|
for idx, g in enumerate(groups, start=1):
|
|
if idx < start:
|
|
continue
|
|
f(idx, g)
|
|
done += 1
|
|
if count != -1 and done >= count:
|
|
return
|
|
|
|
def _parse_attachment_paths(block):
|
|
paths = []
|
|
for line in block.splitlines():
|
|
if line.startswith("Attachments:"):
|
|
# After the colon, comma-separated file paths
|
|
rest = line.split(":", 1)[1].strip()
|
|
if rest:
|
|
parts = [p.strip() for p in rest.split(",") if p.strip()]
|
|
paths.extend(parts)
|
|
# Deduplicate, keep order
|
|
seen = set()
|
|
uniq = []
|
|
for p in paths:
|
|
if p not in seen:
|
|
seen.add(p)
|
|
uniq.append(p)
|
|
return uniq
|
|
|
|
def _safe_read_textfile(p, max_chars=8000):
|
|
try:
|
|
return p.read_text(encoding="utf-8", errors="replace")[:max_chars]
|
|
except Exception:
|
|
return ""
|
|
|
|
def _extract_pdf_text(p, max_pages=10, max_chars=12000):
|
|
if not PyPDF2:
|
|
return ""
|
|
text = []
|
|
try:
|
|
with p.open('rb') as fh:
|
|
reader = PyPDF2.PdfReader(fh)
|
|
pages = min(len(reader.pages), max_pages)
|
|
for i in range(pages):
|
|
try:
|
|
text.append(reader.pages[i].extract_text() or "")
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
return ""
|
|
return "\n".join(text)[:max_chars]
|
|
|
|
def _extract_docx_text(p, max_chars=12000):
|
|
if not docx:
|
|
return ""
|
|
try:
|
|
d = docx.Document(str(p))
|
|
paras = [para.text for para in d.paragraphs if para.text]
|
|
return "\n".join(paras)[:max_chars]
|
|
except Exception:
|
|
return ""
|
|
|
|
def _extract_pptx_text(p, max_chars=12000):
|
|
if not Presentation:
|
|
return ""
|
|
try:
|
|
pres = Presentation(str(p))
|
|
chunks = []
|
|
for slide in pres.slides:
|
|
for shape in slide.shapes:
|
|
try:
|
|
if hasattr(shape, "has_text_frame") and shape.has_text_frame:
|
|
for para in shape.text_frame.paragraphs:
|
|
text = "".join(run.text for run in para.runs)
|
|
if text:
|
|
chunks.append(text)
|
|
except Exception:
|
|
pass
|
|
return "\n".join(chunks)[:max_chars]
|
|
except Exception:
|
|
return ""
|
|
|
|
def _extract_attachment_text(paths):
|
|
out_chunks = []
|
|
for raw in paths:
|
|
p = Path(raw)
|
|
# Ensure relative paths still resolve from repo root
|
|
if not p.is_absolute():
|
|
p = Path.cwd() / p
|
|
if not p.exists():
|
|
continue
|
|
ext = p.suffix.lower()
|
|
text = ""
|
|
if ext == ".pdf":
|
|
text = _extract_pdf_text(p)
|
|
elif ext == ".docx":
|
|
text = _extract_docx_text(p)
|
|
elif ext == ".pptx":
|
|
text = _extract_pptx_text(p)
|
|
# Fallback: try as utf-8 text
|
|
if not text and ext in {".txt", ".md", ".csv"}:
|
|
text = _safe_read_textfile(p)
|
|
if text:
|
|
out_chunks.append(f"--- Attachment: {p.name} ---\n{text}")
|
|
return "\n\n".join(out_chunks)
|
|
|
|
OUT_JSONL = Path("cache/useful_info_summaries.jsonl")
|
|
from localcache2 import init_usefulinfo_schema, insert_usefulinfo_record
|
|
init_usefulinfo_schema()
|
|
|
|
def _loose_parse_json(s):
|
|
# try direct
|
|
try:
|
|
return json.loads(s)
|
|
except Exception:
|
|
pass
|
|
# strip code fences
|
|
try:
|
|
m = re.search(r"```(?:json)?\s*(.*?)```", s, flags=re.S|re.I)
|
|
if m:
|
|
inner = m.group(1)
|
|
try:
|
|
return json.loads(inner)
|
|
except Exception:
|
|
s = inner
|
|
except Exception:
|
|
pass
|
|
# grab from first { to last }
|
|
try:
|
|
i = s.find('{')
|
|
j = s.rfind('}')
|
|
if i != -1 and j != -1 and j > i:
|
|
frag = s[i:j+1]
|
|
return json.loads(frag)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def demo_f(idx, g):
|
|
print(f"[{idx}] {g['subject']} (count: {g['count']})")
|
|
content = g['content']
|
|
attach_paths = _parse_attachment_paths(content)
|
|
if attach_paths:
|
|
attach_text = _extract_attachment_text(attach_paths)
|
|
if attach_text:
|
|
content = content + "\n\n[ATTACHMENT_TEXT]\n" + attach_text
|
|
x = summarize_u_info(content)
|
|
|
|
# Persist JSONL record (robust to non-JSON responses)
|
|
record = {
|
|
"index": idx,
|
|
"subject": g.get('subject'),
|
|
"count": g.get('count'),
|
|
"attachments": attach_paths,
|
|
}
|
|
parsed = _loose_parse_json(x)
|
|
if isinstance(parsed, dict) and parsed:
|
|
record["summary"] = parsed
|
|
else:
|
|
record["summary_raw"] = x
|
|
with open(OUT_JSONL, "a", encoding="utf-8") as outf:
|
|
outf.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
|
|
# Also persist to PostgreSQL using localcache2 with only parsed JSON
|
|
if 'summary' in record:
|
|
try:
|
|
sid = insert_usefulinfo_record(record['summary'])
|
|
print('Inserted summary id:', sid)
|
|
# Link original emails to this summary by normalized subject
|
|
try:
|
|
from localcache2 import link_emails_to_summary
|
|
link_count = link_emails_to_summary(g['subject'], sid)
|
|
print('Linked emails to summary:', link_count)
|
|
except Exception as e2:
|
|
print('[usefulinfo][email-link-failed]', str(e2))
|
|
except Exception as e:
|
|
print('[warn] DB insert failed:', e)
|
|
else:
|
|
print('Skipped insert: could not parse JSON summary for this group.')
|
|
|
|
# Interactive prompts if parameters not provided
|
|
try:
|
|
if not start:
|
|
inp = input('Start group index (1-based, e.g., 1): ').strip()
|
|
start = int(inp) if inp else 1
|
|
if not num:
|
|
inp = input('How many groups to process (e.g., 10, -1 for all): ').strip()
|
|
num = int(inp) if inp else -1
|
|
except Exception:
|
|
start = start or 1
|
|
num = num or -1
|
|
|
|
for_each_group(
|
|
log_path="cache/email_usefulinfo_sorted.txt",
|
|
f=demo_f,
|
|
start=start,
|
|
count=num
|
|
)
|
|
|
|
|
|
|
|
def process_email_filesave(message, log, i):
|
|
atch_list = "Attachments: "
|
|
atch_count = 0
|
|
if SAVE_ATTACHEMENTS:
|
|
attachments = message.Attachments
|
|
try:
|
|
attachment = attachments.Item(1)
|
|
for attachment in message.Attachments:
|
|
print(" -> " + str(attachment))
|
|
#loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
|
|
loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
|
|
attachment.SaveAsFile(loc)
|
|
atch_list += str(attachment) + ', '
|
|
atch_count += 1
|
|
break
|
|
except Exception as e:
|
|
pass
|
|
print(" " + message.Subject)
|
|
log.write(f"\n\n---\nSubject: {message.Subject}\nFrom: {message.Sender}\n")
|
|
if atch_count:
|
|
log.write(f"{atch_list}\n")
|
|
log.write(f"Date: {message.SentOn}\n\n{message.body}\n")
|
|
|
|
logeach = codecs.open(f"cache/faq/{i}.txt","w","utf-8")
|
|
logeach.write(f"Subject: {message.Subject}\nFrom: {message.Sender}\nDate: {message.SentOn}\n\n{message.body}")
|
|
logeach.close()
|
|
|
|
|
|
|
|
|
|
def list_faq():
|
|
import win32com.client
|
|
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
|
|
root_folder = outlook.Folders.Item(1)
|
|
print("\nFAQ Emails:")
|
|
uinfo = root_folder.Folders['for faq']
|
|
index = 0
|
|
|
|
# Get today's date
|
|
end_date = datetime.now()
|
|
|
|
# Go back xx months
|
|
months_back = 60
|
|
chunk_results = []
|
|
|
|
print("\nLoading messages in 1-month chunks...\n")
|
|
|
|
for i in range(months_back):
|
|
chunk_end = end_date.replace(day=1) - timedelta(days=1) # End of previous month
|
|
chunk_start = chunk_end.replace(day=1) # Start of that month
|
|
|
|
start_str = chunk_start.strftime("%m/%d/%Y %H:%M %p")
|
|
end_str = chunk_end.strftime("%m/%d/%Y %H:%M %p")
|
|
|
|
restriction = f"[ReceivedTime] >= '{start_str}' AND [ReceivedTime] <= '{end_str}'"
|
|
filtered = uinfo.Items.Restrict(restriction)
|
|
|
|
# Force enumeration
|
|
#messages = [msg for msg in filtered if msg.Class == 43] # MailItem only
|
|
messages = [msg for msg in filtered ]
|
|
count = len(messages)
|
|
|
|
print(f"{chunk_start.strftime('%B %Y')}: {count} messages")
|
|
chunk_results.append((chunk_start.strftime('%Y-%m'), count))
|
|
|
|
for message in messages:
|
|
try:
|
|
print(f" {index}\t sub: {message.Subject} \t from: {message.Sender} \t on: {message.SentOn}")
|
|
index += 1
|
|
except Exception as e:
|
|
print(f"Exception: {str(e)}")
|
|
|
|
|
|
|
|
end_date = chunk_start # Move back to the previous month
|
|
|
|
'''for message in uinfo.Items:
|
|
try:
|
|
print(f"{i}\t sub: {message.Subject} \t from: {message.Sender} \t on: {message.SentOn}")
|
|
except Exception as e:
|
|
print(f"Exception: {str(e)}")
|
|
|
|
i += 1
|
|
if i % 20 == 0:
|
|
temp = input(f"press enter to continue, or q to quit now at message {i}: ")
|
|
if temp == 'q':
|
|
exit()
|
|
'''
|
|
|
|
|
|
|
|
def fetch_faq():
|
|
import win32com.client
|
|
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
|
|
root_folder = outlook.Folders.Item(1)
|
|
|
|
PAUSE = 1
|
|
|
|
startat = input("Press enter to continue or a number to start at that message: ")
|
|
if startat == '': startat = '0'
|
|
startat = int(startat)
|
|
i = 0
|
|
|
|
|
|
log = codecs.open("cache/email_gpt_faq.txt","w","utf-8")
|
|
log2 = codecs.open("cache/faq.json","a","utf-8")
|
|
|
|
# access a subfolder
|
|
print("\nFAQ Emails:")
|
|
uinfo = root_folder.Folders['for faq']
|
|
for message in uinfo.Items:
|
|
if i < startat:
|
|
i += 1
|
|
continue
|
|
|
|
try:
|
|
process_email_filesave(message, log, i)
|
|
except Exception as e:
|
|
print(f"Exception: {str(e)}")
|
|
|
|
summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
|
|
log2.write( f",\n{summary}")
|
|
log2.flush()
|
|
print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
|
|
|
|
i += 1
|
|
if PAUSE:
|
|
temp = input(f"press enter to continue, or q to quit now at message {i}: ")
|
|
if temp == 'q':
|
|
exit()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print ('')
|
|
options = { 1: ['gpt test',gpt_test] ,
|
|
2: ['test email send',sample_send_email] ,
|
|
3: ['fetch "useful info" mailbox', fetch_useful_info],
|
|
4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
|
|
5: ['list faq mailbox', list_faq],
|
|
6: ['process useful info msgs', process_useful_info],
|
|
7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
|
|
}
|
|
|
|
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|