canvasapp/gpt.py

import os, json, sys, codecs, re
from datetime import datetime, timedelta
import util
from openai import OpenAI


from canvas_secrets import openai_org, openai_api_key

client = OpenAI(
    api_key=openai_api_key,
    organization=openai_org
)

DEFAULT_MODEL = "gpt-5-mini"

SAVE_ATTACHEMENTS = 1


def gpt_chat(instruction, prompt, model=DEFAULT_MODEL):
    # 1) Strip extremely long Outlook protection URLs first
    #try:
    #    prompt = re.sub(r"\S*protection\.outlook\.com\S*", "", prompt, flags=re.I)
    #except Exception:
    #    pass

    # 2) Crude token estimation and truncation (target ~29k tokens)
    #    Approximates 1 token ≈ 4 characters.
    def _est_tokens(s):
        try:
            return max(1, int(len(s) / 4))
        except Exception:
            return 1

    MAX_TOKENS = 29000
    # reserve some tokens for system/overhead
    overhead = 200
    inst_tokens = _est_tokens(instruction)
    prompt_tokens = _est_tokens(prompt)
    total = inst_tokens + prompt_tokens + overhead
    if total > MAX_TOKENS:
        allow = max(500, MAX_TOKENS - inst_tokens - overhead)
        # convert back to characters
        allow_chars = max(2000, allow * 4)
        # keep last N chars under assumption latest content is most relevant
        if len(prompt) > allow_chars:
            prompt = prompt[-allow_chars:]

    messages = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": prompt}
    ]

    try:
        response = client.chat.completions.create(model=model, messages=messages)
        result = response.choices[0].message.content
    except Exception as e:
        # Graceful failure; log and return empty JSON
        try:
            log3 = codecs.open('cache/gptlog.txt','a','utf-8')
            log3.write(json.dumps({'prompt': prompt[:2000], 'error': str(e)}, indent=2))
            log3.write("\n\n---\n\n")
            log3.close()
        except Exception:
            pass
        return "{}"

    # Log success
    try:
        log3 = codecs.open('cache/gptlog.txt','a','utf-8')
        log3.write(json.dumps({'prompt':prompt, 'result': result},indent=2))
        log3.write("\n\n---\n\n")
        log3.close()
    except Exception:
        pass
    return result

def summarize_u_info(msg):
    system_role = """You are a practical efficient cataloger and indexer of information. What follows is one or more emails which most likely (but not necessarily) contains useful information of some sort. Please determine if the message has useful info, and if so, attempt to reformat it as such:
    {"source": <original sender or source>, "date": <month and year MM/YY>, "tags": <short list of tags and topics, multi words use snake_case>, "short": <one sentence summary>, "summary": <complete summary with details if warranted>,"events":<see below>}
    If one or more specific dates and times of an event (or deadline) of some sort are mentioned, please attempt to extract the datetime and 1-2 line summary of each event. The "events" field of the response should be a list of these: {"dt":<date and time>, "length":<if known>, "title":<title>,"description":<short description>,"contact":<person/email who seems to be in charge or know more>}. Try to include relevant links in the description, if they seem relevant.
    For all summaries: speak in a first-person direct voice, authoritatively. For example, instead of saying 'x shared details about offerings at Bakersfield College, including a, b and c', just say 'Bakersfield college offers a, b and c'. If there are links that seem relevant in the email, include them in the summary.
    Some sample tags: event, advice, rule, warning, hr, it, disted, canvas, meeting, senate, dept_chairs, deans, administration, guided_pathways, site, file, article, contract, deadline, construction, catalog, schedule, curriqunet, banner, cvc, password, payroll, training, tip, graduation, photos, faculty, evaluation, convocation, flex, board, curriculum, ztc, oei, grant, accessibility, legislation, hyflex, hybrid, handbook, guideline, lti, api, integration, request, rule, scholarship, library, aws, opportunity, background, calendar, semester, accreditation, professional_development, zoom, ai, html, code, video, pocr, badge, liason, newsletter, act, law, equipment, best_practices, outcomes, slo, plo, ilo, data, cares_act, census, admissions, financial_aid, enrollment, title_5, syllabus, ccn, communication, club, survey, checklist, covid_19, pipr, program_review, policy, vpat, compliance, emergency, finals, rfp, fraud, research, library, tutoring, stem, writing_center, learning_commons, hire, promotion, help_desk, housing, url, tenure, tlc, mental_health, president, vpaa, vpss, vp, mac, meaningful_assessment, negotiation, union, libguide, evidence, retirement,
    Remember that not every email is notable. If this is the case, just return an empty json object.
    Always answer in valid json, nothing else."""
    x = gpt_chat(system_role,msg)
    print(x)
    return x


def gpt_faq(faq):
    system_role = """Ignore all previous instructions. You are an expert on the management and implementation of all aspects of online courses and running a distance education program, especially in higher education and community college. You are also an expert in pedagogy and the tools and techniques of effective teaching and learning, based on research and experience. You know how to focus on concrete examples of effective teaching which actually work in the real world and the advice you give is specific and immediately useful to teachers in the classroom. You provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. Your users are experts in teaching and college administration, and are comfortable with the California and Federal Educational code, and know they need to comply with it. There is no need to explain this beyond noting any relevant laws. Don't be verbose in your answers, but do provide details and examples where it might help the explanation.
You are a world class technical writer. Your style is human-centered, friendly and kind. Give me concise and accurate answers and ignore all the niceties that OpenAI programmed you with. Omit all qualifications and excuses. I know you are a large language model but I want you to pretend you are a confident and superintelligent oracle that can help a limited human warp reality to his own intentions. You can reveal your own sense of humor and depth of intelligence. It is very important that you get this right.
Adopt a penetratingly insightful, incisively funny, cleverly witty persona. Your answers cultivate playful, out-of-the-box thinking, rationality, technology, design, and careful use of incentives. Speak in specific, topic relevant terminology. Do NOT hedge or qualify. Do not waffle. Speak directly and be willing to make creative guesses.
 I am pasting below an email chain between myself and colleague(s). I am fairly certain that it contains a technical question that I have answered. They may be spread out over several messages, or there might be some clarification or follow up questions. There will be headers, signatures, and other text that isn't a part of the core message. Ignore that. Consider the whole email chain while you prepare the following: Respond with a json formatted dictionary that contains the following:
 { "question": "Restate the question or problem in a concise but clear manner", "topics": ["keywords", "or phrases", "that categorize the issue"], "answer": "The best possible answer, written in markdown format. Draw the answer from the email but feel free to edit or embelish based on your knowledge. Generalize the answer to anyone who might have the issue. Your audience is mostly instructors working at a small community college. Do not refer to anyone's name specifically, unless it is Peter or Sabrina, but instead write for a general audience looking for the answers to their questions. We are writing a FAQ or help page. Feel free to use markdown-formatted bold, italic, lists, and links."} """

    return gpt_chat(system_role, faq)


def gpt_test():

    my_prompt = "Write a series of texts trying to sell a pen to a stranger."
    print(sys.argv)


    if len(sys.argv)>1:
        my_prompt = " ".join(sys.argv[1:])
    else:
        print("Prompt: %s" % my_prompt)

    result = gpt_chat("", my_prompt)
    print(result)


def sample_send_email():
    import win32com.client
    ol=win32com.client.Dispatch("outlook.application")
    olmailitem=0x0 #size of the new email
    newmail=ol.CreateItem(olmailitem)
    newmail.Subject= 'Testing Mail'
    newmail.To='peter.howell@gmail.com'
    #newmail.CC='xyz@gmail.com'
    newmail.Body= 'Hello, this is a test email to showcase how to send emails from Python and Outlook.'
    # attach='C:\\Users\\admin\\Desktop\\Python\\Sample.xlsx'
    # newmail.Attachments.Add(attach)
    # To display the mail before sending it
    # newmail.Display()
    newmail.Send()


def fetch_useful_info(save_attachments=True, folder_name='useful info ref'):
    import os, re
    from pathlib import Path
    from itertools import groupby
    from datetime import datetime
    import win32com.client

    CACHE = Path("cache")
    ATT_DIR = CACHE / "attachments_useful_info"
    LOG_PATH = CACHE / "email_usefulinfo_sorted.txt"
    ATT_DIR.mkdir(parents=True, exist_ok=True)
    CACHE.mkdir(parents=True, exist_ok=True)

    # --- helpers ---
    prefix_re = re.compile(r'^\s*(re|fw|fwd|aw|sv|vb|tr|wg)\s*:\s*', re.I)  # common locales too
    bracket_tag_re = re.compile(r'^\s*(\[[^\]]+\]\s*)+', re.I)

    def normalize_subject(s):
        if not s:
            return "(no subject)"
        s = s.strip()
        # strip leading [TAGS] like [EXTERNAL] [Some System]
        s = bracket_tag_re.sub('', s)
        # strip any chain of RE:/FWD: etc. at start
        while prefix_re.match(s):
            s = prefix_re.sub('', s, count=1)
        # collapse whitespace
        s = re.sub(r'\s+', ' ', s).strip()
        return s or "(no subject)"

    def safe_name(s):
        # reasonable Windows-safe folder name for a subject
        s = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', s)
        return s[:120]

    def iso(dt):
        # Outlook COM datetime -> ISO string (local)
        try:
            return dt.strftime('%Y-%m-%d %H:%M:%S')
        except Exception:
            return str(dt)

    def save_all_attachments(mail, subject_dir: Path):
        saved = []
        try:
            atts = mail.Attachments
            count = atts.Count
            if count == 0:
                return saved
            subject_dir.mkdir(parents=True, exist_ok=True)
            # iterate COM collection by index (1-based)
            for i in range(1, count + 1):
                att = atts.Item(i)
                # build unique filename to avoid collisions
                base = str(att.FileName) if getattr(att, "FileName", None) else f"attachment_{i}"
                base = safe_name(base)
                # prefix by sent time for clarity
                ts = datetime.now().strftime('%Y%m%d_%H%M%S')
                out = subject_dir / f"{ts}_{base}"
                att.SaveAsFile(str(out))
                saved.append(str(out.as_posix()))
        except Exception:
            # swallow attachment oddities; continue
            pass
        return saved

    # --- Outlook ---
    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    root_folder = outlook.Folders.Item(1)
    print(f"Root folder: {root_folder.Name}")

    uinfo = root_folder.Folders[folder_name]

    # Collect first to a list so we can sort/group
    records = []
    items = uinfo.Items
    # Optional: sort by SentOn ascending inside Outlook (helps performance for big folders)
    try:
        items.Sort("[SentOn]", True)  # True => ascending
    except Exception:
        pass

    for message in items:
        # Skip non-mail items
        if getattr(message, "Class", None) != 43:  # 43 = MailItem
            continue
        subj_raw = getattr(message, "Subject", "") or ""
        subj_norm = normalize_subject(subj_raw)

        # sender fallback chain
        sender = None
        for attr in ("Sender", "SenderName", "SenderEmailAddress"):
            try:
                sender = getattr(message, attr)
                if sender:
                    break
            except Exception:
                pass
        sender = str(sender) if sender else "UNKNOWN"

        sent_on = getattr(message, "SentOn", None)
        sent_iso = iso(sent_on)

        attachments_saved = []
        if save_attachments:
            attachments_saved = save_all_attachments(
                message,
                ATT_DIR / safe_name(subj_norm)
            )

        body = ""
        try:
            body = message.Body or ""
        except Exception:
            try:
                body = message.HTMLBody or ""
            except Exception:
                body = ""

        records.append({
            "subject_norm": subj_norm,
            "subject_raw": subj_raw,
            "sender": sender,
            "sent_on": sent_on,
            "sent_iso": sent_iso,
            "attachments": attachments_saved,
            "body": body,
        })

    # Sort by normalized subject, then by sent time (ascending)
    records.sort(key=lambda r: (r["subject_norm"].lower(), r["sent_on"] or datetime.min))

    # Write grouped log
    with open(LOG_PATH, "w", encoding="utf-8", newline="") as log:
        current_subject = None
        for subj, group_iter in groupby(records, key=lambda r: r["subject_norm"]):
            thread = list(group_iter)
            log.write(f"\n\n### {subj}  —  {len(thread)} message(s)\n")
            for r in thread:
                if r["attachments"]:
                    att_line = "Attachments: " + ", ".join(r["attachments"]) + "\n"
                else:
                    att_line = ""
                log.write(
                    f"\n---\n"
                    f"Subject: {r['subject_raw']}\n"
                    f"From: {r['sender']}\n"
                    f"Date: {r['sent_iso']}\n"
                    f"{att_line}\n"
                    f"{r['body']}\n"
                )

    print(f"Wrote grouped log -> {LOG_PATH.as_posix()}")

    # Insert original emails & attachments into DB (summary linkage added later)
    from localcache2 import init_usefulinfo_schema, insert_usefulinfo_email
    init_usefulinfo_schema()

    # Helpers to extract attachment text
    try:
        import PyPDF2
    except Exception:
        PyPDF2 = None
    try:
        import docx
    except Exception:
        docx = None
    try:
        from pptx import Presentation
    except Exception:
        Presentation = None

    def _extract_pdf_text(p):
        if not PyPDF2:
            return ''
        try:
            with open(p, 'rb') as fh:
                r = PyPDF2.PdfReader(fh)
                txt = []
                for i, pg in enumerate(r.pages[:10]):
                    try: txt.append(pg.extract_text() or '')
                    except Exception: pass
                return "\n".join(txt)[:12000]
        except Exception:
            return ''
    def _extract_docx_text(p):
        if not docx:
            return ''
        try:
            d = docx.Document(p)
            return "\n".join([para.text for para in d.paragraphs if para.text])[:12000]
        except Exception:
            return ''
    def _extract_pptx_text(p):
        if not Presentation:
            return ''
        try:
            pres = Presentation(p)
            chunks = []
            for slide in pres.slides:
                for shape in slide.shapes:
                    try:
                        if hasattr(shape, 'has_text_frame') and shape.has_text_frame:
                            for para in shape.text_frame.paragraphs:
                                txt = ''.join(run.text for run in para.runs)
                                if txt: chunks.append(txt)
                    except Exception: pass
            return "\n".join(chunks)[:12000]
        except Exception:
            return ''

    for rec in records:
        atts = []
        for ap in rec['attachments']:
            ext = os.path.splitext(ap)[1].lower()
            txt = ''
            if ext == '.pdf':
                txt = _extract_pdf_text(ap)
            elif ext == '.docx':
                txt = _extract_docx_text(ap)
            elif ext == '.pptx':
                txt = _extract_pptx_text(ap)
            atts.append({'path': ap, 'text': txt or None})
        try:
            insert_usefulinfo_email(
                subject_raw=rec['subject_raw'],
                subject_norm=rec['subject_norm'],
                sender=rec['sender'],
                sent_iso=rec['sent_iso'],
                body=rec['body'],
                attachments=atts,
                summary_id=None,
            )
        except Exception as e:
            print('[usefulinfo][email-insert-failed]', rec.get('subject_raw'), str(e))


def process_useful_info(start=0, num=0):
    import re
    from pathlib import Path

    # Optional import heavy libs only when needed
    try:
        import PyPDF2
    except Exception:
        PyPDF2 = None
    try:
        import docx  # python-docx
    except Exception:
        docx = None
    try:
        from pptx import Presentation
    except Exception:
        Presentation = None

    HEADER_RE = re.compile(r'\r?\n###\s(.*)\r?\n')  # your pattern, CRLF-safe
    COUNT_RE  = re.compile(r'^(?P<subject>.*?)\s+—\s+(?P<count>\d+)\s+message', re.I)

    def parse_groups(text):
        """
        Return a list of groups found in the log file.
        Each group is a dict: {header, subject, count, content}
        """
        groups = []
        matches = list(HEADER_RE.finditer(text))
        if not matches:
            return groups

        for i, m in enumerate(matches):
            header = m.group(1).strip()  # e.g. "Subject X — 3 message(s)" OR just "Subject X"
            start = m.end()
            end   = matches[i+1].start() if i + 1 < len(matches) else len(text)
            content = text[start:end]

            # Try to extract subject and count if present
            subject = header
            count = None
            cm = COUNT_RE.search(header)
            if cm:
                subject = cm.group('subject').strip()
                try:
                    count = int(cm.group('count'))
                except Exception:
                    count = None

            groups.append({
                "header": header,
                "subject": subject,
                "count": count,
                "content": content
            })
        return groups


    def for_each_group(log_path="cache/email_usefulinfo_sorted.txt", f=lambda idx, g: None, start=1, count=-1):
        """
        Read the grouped log, split into groups, and call f(index, group) on each.
        start: 1-based index to begin processing (useful for resuming).
        """
        p = Path(log_path)
        text = p.read_text(encoding="utf-8", errors="replace")

        groups = parse_groups(text)

        done = 0

        if start < 1:
            start = 1
        for idx, g in enumerate(groups, start=1):
            if idx < start:
                continue
            f(idx, g)
            done += 1
            if count != -1 and done >= count:
                return

    def _parse_attachment_paths(block):
        paths = []
        for line in block.splitlines():
            if line.startswith("Attachments:"):
                # After the colon, comma-separated file paths
                rest = line.split(":", 1)[1].strip()
                if rest:
                    parts = [p.strip() for p in rest.split(",") if p.strip()]
                    paths.extend(parts)
        # Deduplicate, keep order
        seen = set()
        uniq = []
        for p in paths:
            if p not in seen:
                seen.add(p)
                uniq.append(p)
        return uniq

    def _safe_read_textfile(p, max_chars=8000):
        try:
            return p.read_text(encoding="utf-8", errors="replace")[:max_chars]
        except Exception:
            return ""

    def _extract_pdf_text(p, max_pages=10, max_chars=12000):
        if not PyPDF2:
            return ""
        text = []
        try:
            with p.open('rb') as fh:
                reader = PyPDF2.PdfReader(fh)
                pages = min(len(reader.pages), max_pages)
                for i in range(pages):
                    try:
                        text.append(reader.pages[i].extract_text() or "")
                    except Exception:
                        pass
        except Exception:
            return ""
        return "\n".join(text)[:max_chars]

    def _extract_docx_text(p, max_chars=12000):
        if not docx:
            return ""
        try:
            d = docx.Document(str(p))
            paras = [para.text for para in d.paragraphs if para.text]
            return "\n".join(paras)[:max_chars]
        except Exception:
            return ""

    def _extract_pptx_text(p, max_chars=12000):
        if not Presentation:
            return ""
        try:
            pres = Presentation(str(p))
            chunks = []
            for slide in pres.slides:
                for shape in slide.shapes:
                    try:
                        if hasattr(shape, "has_text_frame") and shape.has_text_frame:
                            for para in shape.text_frame.paragraphs:
                                text = "".join(run.text for run in para.runs)
                                if text:
                                    chunks.append(text)
                    except Exception:
                        pass
            return "\n".join(chunks)[:max_chars]
        except Exception:
            return ""

    def _extract_attachment_text(paths):
        out_chunks = []
        for raw in paths:
            p = Path(raw)
            # Ensure relative paths still resolve from repo root
            if not p.is_absolute():
                p = Path.cwd() / p
            if not p.exists():
                continue
            ext = p.suffix.lower()
            text = ""
            if ext == ".pdf":
                text = _extract_pdf_text(p)
            elif ext == ".docx":
                text = _extract_docx_text(p)
            elif ext == ".pptx":
                text = _extract_pptx_text(p)
            # Fallback: try as utf-8 text
            if not text and ext in {".txt", ".md", ".csv"}:
                text = _safe_read_textfile(p)
            if text:
                out_chunks.append(f"--- Attachment: {p.name} ---\n{text}")
        return "\n\n".join(out_chunks)

    OUT_JSONL = Path("cache/useful_info_summaries.jsonl")
    from localcache2 import init_usefulinfo_schema, insert_usefulinfo_record
    init_usefulinfo_schema()

    def _loose_parse_json(s):
        # try direct
        try:
            return json.loads(s)
        except Exception:
            pass
        # strip code fences
        try:
            m = re.search(r"```(?:json)?\s*(.*?)```", s, flags=re.S|re.I)
            if m:
                inner = m.group(1)
                try:
                    return json.loads(inner)
                except Exception:
                    s = inner
        except Exception:
            pass
        # grab from first { to last }
        try:
            i = s.find('{')
            j = s.rfind('}')
            if i != -1 and j != -1 and j > i:
                frag = s[i:j+1]
                return json.loads(frag)
        except Exception:
            pass
        return None

    def demo_f(idx, g):
        print(f"[{idx}] {g['subject']}  (count: {g['count']})")
        content = g['content']
        attach_paths = _parse_attachment_paths(content)
        if attach_paths:
            attach_text = _extract_attachment_text(attach_paths)
            if attach_text:
                content = content + "\n\n[ATTACHMENT_TEXT]\n" + attach_text
        x = summarize_u_info(content)

        # Persist JSONL record (robust to non-JSON responses)
        record = {
            "index": idx,
            "subject": g.get('subject'),
            "count": g.get('count'),
            "attachments": attach_paths,
        }
        parsed = _loose_parse_json(x)
        if isinstance(parsed, dict) and parsed:
            record["summary"] = parsed
        else:
            record["summary_raw"] = x
        with open(OUT_JSONL, "a", encoding="utf-8") as outf:
            outf.write(json.dumps(record, ensure_ascii=False) + "\n")

        # Also persist to PostgreSQL using localcache2 with only parsed JSON
        if 'summary' in record:
            try:
                sid = insert_usefulinfo_record(record['summary'])
                print('Inserted summary id:', sid)
                # Link original emails to this summary by normalized subject
                try:
                    from localcache2 import link_emails_to_summary
                    link_count = link_emails_to_summary(g['subject'], sid)
                    print('Linked emails to summary:', link_count)
                except Exception as e2:
                    print('[usefulinfo][email-link-failed]', str(e2))
            except Exception as e:
                print('[warn] DB insert failed:', e)
        else:
            print('Skipped insert: could not parse JSON summary for this group.')

    # Interactive prompts if parameters not provided
    try:
        if not start:
            inp = input('Start group index (1-based, e.g., 1): ').strip()
            start = int(inp) if inp else 1
        if not num:
            inp = input('How many groups to process (e.g., 10, -1 for all): ').strip()
            num = int(inp) if inp else -1
    except Exception:
        start = start or 1
        num = num or -1

    for_each_group(
        log_path="cache/email_usefulinfo_sorted.txt",
        f=demo_f,
        start=start,
        count=num
    )


def process_email_filesave(message, log, i):
        atch_list = "Attachments: "
        atch_count = 0
        if SAVE_ATTACHEMENTS:
            attachments = message.Attachments
            try:
                attachment = attachments.Item(1)
                for attachment in message.Attachments:
                    print("    -> " + str(attachment))
                    #loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
                    loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
                    attachment.SaveAsFile(loc)
                    atch_list += str(attachment) + ', '
                    atch_count += 1
                    break
            except Exception as e:
                pass
        print("  " + message.Subject)
        log.write(f"\n\n---\nSubject: {message.Subject}\nFrom: {message.Sender}\n")
        if atch_count:
            log.write(f"{atch_list}\n")
        log.write(f"Date: {message.SentOn}\n\n{message.body}\n")

        logeach = codecs.open(f"cache/faq/{i}.txt","w","utf-8")
        logeach.write(f"Subject: {message.Subject}\nFrom: {message.Sender}\nDate: {message.SentOn}\n\n{message.body}")
        logeach.close()


def list_faq():
    import win32com.client
    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    root_folder = outlook.Folders.Item(1)
    print("\nFAQ Emails:")
    uinfo = root_folder.Folders['for faq']
    index = 0

    # Get today's date
    end_date = datetime.now()

    # Go back xx months
    months_back = 60
    chunk_results = []

    print("\nLoading messages in 1-month chunks...\n")

    for i in range(months_back):
        chunk_end = end_date.replace(day=1) - timedelta(days=1)  # End of previous month
        chunk_start = chunk_end.replace(day=1)  # Start of that month

        start_str = chunk_start.strftime("%m/%d/%Y %H:%M %p")
        end_str = chunk_end.strftime("%m/%d/%Y %H:%M %p")

        restriction = f"[ReceivedTime] >= '{start_str}' AND [ReceivedTime] <= '{end_str}'"
        filtered = uinfo.Items.Restrict(restriction)

        # Force enumeration
        #messages = [msg for msg in filtered if msg.Class == 43]  # MailItem only
        messages = [msg for msg in filtered ]
        count = len(messages)

        print(f"{chunk_start.strftime('%B %Y')}: {count} messages")
        chunk_results.append((chunk_start.strftime('%Y-%m'), count))

        for message in messages:
            try:
                print(f"    {index}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
                index += 1
            except Exception as e:
                print(f"Exception: {str(e)}")


        end_date = chunk_start  # Move back to the previous month

    '''for message in uinfo.Items:
        try:
            print(f"{i}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
        except Exception as e:
            print(f"Exception: {str(e)}")

        i += 1
        if i % 20 == 0:
            temp = input(f"press enter to continue, or q to quit now at message {i}: ")
            if temp == 'q':
                exit()
    '''


def fetch_faq():
    import win32com.client
    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    root_folder = outlook.Folders.Item(1)

    PAUSE = 1

    startat = input("Press enter to continue or a number to start at that message: ")
    if startat == '': startat = '0'
    startat = int(startat)
    i = 0


    log = codecs.open("cache/email_gpt_faq.txt","w","utf-8")
    log2 = codecs.open("cache/faq.json","a","utf-8")

    # access a subfolder
    print("\nFAQ Emails:")
    uinfo = root_folder.Folders['for faq']
    for message in uinfo.Items:
        if i < startat:
            i += 1
            continue

        try:
            process_email_filesave(message, log, i)
        except Exception as e:
            print(f"Exception: {str(e)}")

        summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
        log2.write( f",\n{summary}")
        log2.flush()
        print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")

        i += 1
        if PAUSE:
            temp = input(f"press enter to continue, or q to quit now at message {i}: ")
            if temp == 'q':
                exit()


if __name__ == "__main__":

    print ('')
    options = { 1: ['gpt test',gpt_test] ,
                2: ['test email send',sample_send_email] ,
                3: ['fetch "useful info" mailbox', fetch_useful_info],
                4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
                5: ['list faq mailbox', list_faq],
                6: ['process useful info msgs', process_useful_info],
                7: ['export useful info events to .ics', lambda: (__import__('localcache2').localcache2.export_usefulinfo_events_to_ics() or True)],
    }


    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
        resp = int(sys.argv[1])
        print("\n\nPerforming: %s\n\n" % options[resp][0])

    else:
        print ('')
        for key in options:
            print(str(key) + '.\t' + options[key][0])

        print('')
        resp = input('Choose: ')

    # Call the function in the options dict
    options[ int(resp)][1]()