spring 25 updates

2025-05-09 09:39:32 -07:00 · 2025-05-09 09:39:32 -07:00 · 68e48ec6ac
parent 9bfab84aae
commit 68e48ec6ac
22 changed files with 4353 additions and 2298 deletions
--- a/calendarget.py
+++ b/calendarget.py
@ -0,0 +1,158 @@
+from ast import FormattedValue
+from googleapiclient.discovery import build
+import datetime
+from datetime import timedelta
+from zoneinfo import ZoneInfo
+import win32com.client
+
+from canvas_secrets import GOOGLE_API_KEY
+
+# Replace these with your own API key and Calendar ID.
+calendars = {'peter_main':'peter.howell@gmail.com',
+               'aly_and_peter':'5qgh1nv9g5on3am4vres9i451c@group.calendar.google.com',
+               'tlc':'4aq36obt0q5jjr5p82p244qs7c@group.calendar.google.com',
+               'birthdays':'4q73r3ern2k9k83t0orq6iqaac@group.calendar.google.com'}    
+
+
+
+def to_my_timezone(d, md_table_format=0):
+    # Parse the datetime string into a timezone-aware datetime.
+    dt = datetime.datetime.fromisoformat(d)
+
+    # Convert to Pacific Time.
+    dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
+
+    # Format the datetime. Note:
+    # - %A: full weekday name (e.g., Thursday)
+    # - %B: full month name (e.g., April)
+    # - %d: day of the month (with leading zero, so we'll remove it later)
+    # - %I: hour in 12-hour format (with leading zero)
+    # - %M: minute (with leading zero)
+    # - %p: AM/PM indicator (will be in uppercase)
+    formatted = dt_pacific.strftime("%A, %B %d | %I:%M%p")
+
+    # Remove a leading zero from the day and hour if present
+    formatted = formatted.replace(" 0", " ")
+
+    # Convert the AM/PM indicator to lowercase
+    formatted = formatted.replace("AM", "am").replace("PM", "pm")
+    return formatted
+    #return dt_pacific.strftime("%Y-%m-%d %H:%M:%S %Z%z")
+
+def in_my_timezone(d, md_table_format=0):
+    # Parse the datetime string into a timezone-aware datetime.
+    dt = datetime.datetime.fromisoformat(d)
+
+    # Convert to Pacific Time.
+    #dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
+
+    # Format the datetime. Note:
+    # - %A: full weekday name (e.g., Thursday)
+    # - %B: full month name (e.g., April)
+    # - %d: day of the month (with leading zero, so we'll remove it later)
+    # - %I: hour in 12-hour format (with leading zero)
+    # - %M: minute (with leading zero)
+    # - %p: AM/PM indicator (will be in uppercase)
+    formatted = dt.strftime("%A, %B %d | %I:%M%p")
+
+    # Remove a leading zero from the day and hour if present
+    formatted = formatted.replace(" 0", " ")
+
+    # Convert the AM/PM indicator to lowercase
+    formatted = formatted.replace("AM", "am").replace("PM", "pm")
+    return formatted
+
+def gcal():
+    # Build the service using the API key.
+    service = build('calendar', 'v3', developerKey=GOOGLE_API_KEY)
+    n = 30
+
+
+    for name,id in calendars.items():
+        # Get the current time in RFC3339 format (UTC).
+        now = datetime.datetime.utcnow().isoformat() + 'Z'
+        print(f'Getting the upcoming {n} events')
+
+        events_result = service.events().list(
+            calendarId=id,
+            timeMin=now,
+            maxResults=n,
+            singleEvents=True,
+            orderBy='startTime'
+        ).execute()
+        events = events_result.get('items', [])
+
+        if not events:
+            print('No upcoming events found.')
+            return
+
+        print(f"| Date | Time | Event | Lead |")
+        print(f"|------|------|-------|------|")
+        for event in events:
+            # Depending on the event, the start time might be a date or dateTime.
+            start = event['start'].get('dateTime', event['start'].get('date'))
+            print(f"| {to_my_timezone(start,1)} | {event.get('summary', 'No Title')} | | |")
+
+
+def ocal():
+    
+
+    # Initialize Outlook COM object.
+    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+    #outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+
+    #print(outlook)
+
+    #print(dir(outlook))
+
+    #print(dir(outlook.Folders))
+
+    root_folder = outlook.Folders.Item(1)
+    print (f"Root folder: {root_folder.Name}")
+    
+    #And to know the names of the subfolders you have:
+    #print("\nFolders:")
+    #for folder in root_folder.Folders:
+    #    print ("  " + folder.Name)
+        
+
+
+    # Get the default calendar folder.
+    calendar_folder = outlook.GetDefaultFolder(9)  # 9 refers to the Calendar folder
+    #print(calendar_folder)
+    #print(dir(calendar_folder))
+    #print(calendar_folder.Items)
+    items = calendar_folder.Items
+    print("Total items in Calendar:", items.Count)
+
+    # Define the time window for which to fetch events.
+    n = 14
+    now = datetime.datetime.now()
+    end = now + timedelta(days=n)  # next 7 days
+
+    # Restrict the calendar items to the time window.
+    # The Outlook filter syntax uses dates in "mm/dd/yyyy hh:mm" format.
+    filter_start = now.strftime("%m/%d/%Y %H:%M")
+    filter_end = end.strftime("%m/%d/%Y %H:%M")
+    restriction = f"[Start] >= '{filter_start}' AND [End] <= '{filter_end}'"
+
+    calendar_items = calendar_folder.Items
+    calendar_items.IncludeRecurrences = True
+    calendar_items.Sort("[Start]")
+
+    #print(calendar_items)
+
+    print(f"Calendar items in next {n} days:")
+    restricted_items = calendar_items.Restrict(restriction)
+
+    for item in restricted_items:
+    #for item in calendar_items:
+        start_dt = item.Start  # a COM datetime object
+        start = in_my_timezone(str(start_dt),1)
+        subject = item.Subject
+        print(f"{start} - {subject}")
+
+
+
+if __name__ == '__main__':
+    ocal()
--- a/content.py
+++ b/content.py
--- a/courses.py
+++ b/courses.py
@ -1,14 +1,15 @@
-from ast import Try
+#from ast import Try, TryStar
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from datetime import datetime   
 import pytz
-from dateutil import parser
+#from dateutil import parser
 from datetime import datetime
 #from symbol import try_stmt
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
-from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
-from pipelines import sems
+from pipelines import fetch, fetch_stream, fetch_collapse, header, url
+from schedules import get_semester_schedule
+#from pipelines import sems
 from localcache import course_quick_stats, get_courses_in_term_local, course_student_stats, all_sem_courses_teachers, full_reload
 from localcache2 import db, users_new_this_semester, users_new_this_2x_semester, course_from_id, user_ids_in_shell
 from collections import defaultdict
@ -399,7 +400,8 @@ def course_term_summary_local(term="180",term_label="FA23"):
 from localcache2 import student_count, teacher_list, course_from_id, course_sched_entry_from_id

 # Relevant stuff trying to see if its even being used or not
-def course_term_summary(term="287",term_label="SP25"):
+# relies on schedule being in database
+def course_term_summary(term="289",term_label="FA25"):
    print("Summary of %s" % term_label)
    get_fresh = 1
    courses = getCoursesInTerm(term, get_fresh, 0)
@ -490,7 +492,7 @@ GROUP BY c.code ORDER BY c.state, c.code""" % (S['id'],S['id'])
    outp2.write("\n\n---------\nNOT PUBLISHED\n\n" + json.dumps(notpub, indent=2))

 # Fetch all courses in a given term
-def getCoursesInTerm(term=0,get_fresh=1,show=0,active=0):        # a list
+def getCoursesInTerm(term=0,get_fresh=1,show=1,active=0):        # a list
    if not term:
        term = getTerms(1,1)
    ff = 'cache/courses_in_term_%s.json' % str(term)
@ -590,15 +592,9 @@ def all_equal2(iterator):
    return len(set(iterator)) <= 1


-"""
-180     2023 Fall
-179     2023 Summer
-178     2023 Spring
-177     2023 Winter
-"""
 def semester_cross_lister():
-    sem = "sp25"
-    term = 287  #sp25
+    sem = "fa25"
+    term = 289
    xlist_filename = f"cache/{sem}_crosslist.csv"
    checkfile = codecs.open('cache/xlist_check.html','w','utf-8')
    checkfile.write('<html><body><table>\n')
@ -700,6 +696,10 @@ def ez_xlist():
 # Crosslist given 2 ids, computing the new name and code
 def xlist(host_id, parasite_list):
    host_info = course_from_id(host_id)
+
+    if not host_info:
+        print(f"Couldn't find course id {host_id} in database. Do you need to update it?")
+        return ""
    host_info['crn'] = host_info['sis_source_id'][7:]
    host_info['dept'] = dept_from_name( host_info['course_code'] )
    host_info['num'] = num_from_name(host_info['course_code'] )
@ -709,6 +709,9 @@ def xlist(host_id, parasite_list):
    
    para_info_list = [ course_from_id(x) for x in parasite_list ]
    for p in para_info_list:
+        if not p:
+            print(f"Couldn't find course id for parasite in database. Do you need to update it?")
+            return ""
        p['crn'] = p['sis_source_id'][7:]
        p['dept'] = dept_from_name(p['course_code'] )
        p['num'] = num_from_name(p['course_code'] )
@ -826,58 +829,69 @@ def course_term_summary_3():

 # check number of students and publish state of all shells in a term
 def all_semester_course_sanity_check():
+    outputfile = 'cache/courses_checker.csv'
    t = 287
    term = "sp25"
-    c = getCoursesInTerm(t,0,0)    
+    c = getCoursesInTerm(t,1,0)    
    sched1 = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
    sched = { x['crn']: x for x in sched1 }
    #codecs.open('cache/courses_in_term_{t}.json','w','utf-8').write(json.dumps(c,indent=2))
    #output = codecs.open('cache/courses_w_sections.csv','w','utf-8')
    #output.write(  ",".join(['what','id','parent_course_id','sis_course_id','name']) + "\n" )
-    output2 = codecs.open('cache/courses_checker.csv','w','utf-8')
+    output2 = codecs.open(outputfile,'w','utf-8')
    output2.write(  ",".join(['id','sis_course_id','name','state','mode','startdate','students']) + "\n" )
    htmlout = codecs.open('cache/courses_checker.html','w','utf-8')
    htmlout.write('<html><body><table>\n')
    htmlout.write(f'<tr><td><b>Name</b></td><td><b>SIS ID</b></td><td><b>State</b></td><td><b>Mode</b></td><td><b>Start Date</b></td><td><b># Stu</b></td></tr>\n')
+    html_sections = []
    i = 0
    for course in c:
-        u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
-        course['info'] = fetch(u2)
+        try:
+            u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
+            course['info'] = fetch(u2)
        
-        # correlate to schedule
-        crn = course['sis_course_id'][7:]
-        ctype = '?'
-        cstart = '?'
-        ts = '?'
-        if crn in sched:
-            ctype = sched[crn]['type']
-            cstart = sched[crn]['start']
-            ts = sched[crn]['act']
+            # correlate to schedule
+            crn = course['sis_course_id'][7:]
+            ctype = '?'
+            cstart = '?'
+            ts = '?'
+            if crn in sched:
+                ctype = sched[crn]['type']
+                cstart = sched[crn]['start']
+                ts = sched[crn]['act']

-        info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts  ]
-        info = list(map(str,info))
-        info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
-        info2 = list(map(str,info2))
-        output2.write( ",".join(info2) + "\n" )
-        output2.flush()
-        print(info2)
-        #output.write( ",".join(info) + "\n" )
+            info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts  ]
+            info = list(map(str,info))
+            info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
+            info2 = list(map(str,info2))
+            output2.write( ",".join(info2) + "\n" )
+            output2.flush()
+            print(info2)
+            #output.write( ",".join(info) + "\n" )

-        uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
-        htmlout.write(f'<tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
-        htmlout.flush()
-        #uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
-        #course['sections'] = fetch(uu)
-        #s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
-        #for row in s_info:
-        #    print(row)
-        #    output.write( ",".join( map(str,row) ) + "\n" )
-        #output.flush()
-        i += 1
-        #if i % 5 == 0:
-        #    codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+            uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
+            if course["workflow_state"]=='unpublished' and ctype=='online' and cstart=="1-27":
+                html_sections.append(f'<!--{course["name"]}--><tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
+            #uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
+            #course['sections'] = fetch(uu)
+            #s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
+            #for row in s_info:
+            #    print(row)
+            #    output.write( ",".join( map(str,row) ) + "\n" )
+            #output.flush()
+            i += 1
+            #if i % 5 == 0:
+            #    codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+        except Exception as e:
+            print(f"error on {course}")
+            print(f"{e}")
    #codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+
+    html_sections.sort()
+    for h in html_sections:
+        htmlout.write(h)
    htmlout.write('</table></body></html>\n')
+    print(f"wrote to {outputfile}")



@ -999,7 +1013,9 @@ def unenroll_student(courseid,enrolid):


 def enroll_id_list_to_shell(id_list, shell_id, v=0):
-    id_list = set(id_list)
+    # id list has pairs, [id,name]
+
+    id_list = set([i[0] for i in id_list])
    existing = course_enrollment(shell_id)     # by user_id
    existing_ids = set( [ x['user_id'] for x in existing.values() ])

@ -1181,12 +1197,28 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1):

 def enroll_gott_workshops():
    # stupid gav tls broken
-    #r = requests.get("https://www.gavilan.edu/staff/tlc/db.php?a=signups")
+    r = requests.get("https://www.gavilan.edu/staff/tlc/signups.php")
+
+    text = r.text
+
+    # Regex to extract the JSON object
+    match = re.search(r"var\s+signups\s*=\s*(\[\{.*?\}\]);", text, re.DOTALL)
+
+    if match:
+        json_str = match.group(1)  # Extract the JSON string
+        try:
+            signups = json.loads(json_str)  # Convert to Python list of dicts
+            #print(json.dumps(signups,indent=2))
+        except json.JSONDecodeError as e:
+            print("Error decoding JSON:", e)
+            return
+    else:
+        print("JSON object not found")
+        return
+
    #signups = json.loads(r.text)
    
    #signups = json.loads(codecs.open('cache/signups.json','r','utf-8').read())
-    
-    all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())

    # update w/ users.py #1
    all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
@ -1212,8 +1244,13 @@ def enroll_gott_workshops():
        #'6/10-6/23 GOTT 5: Essentials of Blended Learning, Hyflex2024-06-10 12:00:00': 20568,
        #'6/17-6/30 GOTT 6 Introduction to Live Online Teaching and Learning2024-06-17 12:00:00': 20569,
        #'GOTT 1 Intro to Teaching Online AUG242024-07-29 12:00:00': 20603,  # 7/29
-        ['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
-        ['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
+        #['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
+        #['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
+
+        # date, title, shell_id
+        #['2025-02-23 16:00:00', 'GOTT 6: Intro to Synchronous Teaching (Sync/Hyflex)', 21835],
+        ['2025-03-14 17:00:00', 'GOTT 5: The Essentials of Blended Learning (Hybrid) ', '21886'],
+        #['2025-02-23 16:00:00', 'GOTT 1: Intro to Teaching Online (2 week, async)', 21874]
    ]
    #print(json.dumps(signups,indent=4))
    #print(json.dumps(by_email,indent=4))
@ -1236,31 +1273,30 @@ def enroll_gott_workshops():
            'rpotter@gav.edu': 'rpotter@gavilan.edu',
            }

-    #for wkshp,su_list in signups.items():
-    print(workshop_ids)
    for each_workshop in workshop_ids:
        #if wkshp not in workshop_ids:
        #    print(f"skipping {wkshp}")
        #    continue
-        wkshp, shell_id, student_list = each_workshop
+        wkshp_date, wkshp_title, wkshp_shell_id = each_workshop
        to_enroll = []
-        from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
+        #from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
        #print(from_file)

-        for s in from_file:
-            e = s[1].lower()
-            if e in subs:
-                e = subs[e]
-            print( f"{wkshp} {e} {s[0]}" )
-            if e in by_email:
-                user = by_email[e]
-                #print(f"\t{user['name']} {e} {user['login_id']}")
-                to_enroll.append(user['id'])
-            else:
-                #print("** ** NOT FOUND")
-                pass
-        print(f"Workshop: {wkshp} \n\tEnrolling: {str(to_enroll)}")
-        enroll_id_list_to_shell(to_enroll, shell_id)
+        for s in signups:
+            if wkshp_date == s['date_rsvp'] and wkshp_title == s['training']:
+                e = s['email'].lower()
+                if e in subs:
+                    e = subs[e]
+                print( f"{wkshp_title} {e} {s['name']}" )
+                if e in by_email:
+                    user = by_email[e]
+                    #print(f"\t{user['name']} {e} {user['login_id']}")
+                    to_enroll.append([user['id'],user['name']])
+                else:
+                    #print("** ** NOT FOUND")
+                    pass
+        print(f"Workshop: {wkshp_date} {wkshp_title} \n\tEnrolling: {', '.join(i[1] for i in to_enroll)}")
+        enroll_id_list_to_shell(to_enroll, wkshp_shell_id)

 def enroll_gnumber_list_to_courseid():
    infile = codecs.open('cache/gottenrollments.txt','r','utf-8').readlines()
@ -1312,7 +1348,7 @@ def enroll_orientation_students():
    # users_to_enroll = users_new_this_semester(the_semester)             ###               #####   USES LOCAL DB
    
    # double semester (SU + FA)
-    users_to_enroll = users_new_this_2x_semester("202510", "202530")                        #####   USES LOCAL DB
+    users_to_enroll = users_new_this_2x_semester("202550", "202570")                        #####   USES LOCAL DB

    #print("ALL ORIENTATION STUDENTS %s" % str(users_to_enroll))
    #print("\n\nALREADY IN ORI SHELL %s" % str(users_in_ori_shell))
@ -1406,6 +1442,138 @@ def course_search_by_sis():
        # print(json.dumps(x, indent=2))


+def set_custom_start_dates():
+    TERM = 288
+    SEM = "su25"
+
+    make_changes = 1
+    do_all = 0
+    get_fresh = 0
+
+    term_start_month = 6
+    term_start_day = 2
+
+    # just do certain ids in cache/changeme.txt
+    limit_to_specific_ids = 1
+
+    limit_to = [x.strip() for x in open('cache/changeme.txt','r').readlines()]
+    
+    # get list of online course shells
+    if get_fresh:
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,get_fresh,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+    else:
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
+    
+    # dict to match section numbers between shells and schedule
+    crn_to_canvasid = {}
+    for C in c:
+        if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
+            crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
+    # get course info from schedule
+    s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
+    for S in s:
+        # get dates
+        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
+        d_start = datetime.strptime(start,"%m/%d/%Y")
+        
+        # try to find online shell matching this schedule entry
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+
+            if limit_to_specific_ids and (not this_id in limit_to):
+                continue
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
+            continue
+        
+        print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
+
+        # Do we adjust the start date? Only if it doesn't match term
+        if d_start.month == term_start_month and d_start.day == term_start_day:
+            print("   Ignoring, term start date" )
+            continue
+            
+        else:
+            print("   Adjust course start day?")
+                
+            if make_changes:
+                if do_all != 'a':
+                    do_all = input('   -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit.  >')
+                    if do_all == 'n':
+                        exit()
+                if do_all == '' or do_all == 'a':    
+                    data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
+                            'course[restrict_enrollments_to_course_dates]':True }
+                    u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
+                    r3 = requests.put(u2, headers=header, params=data)
+                    print("   updated..  OK")
+                    
+
+
+def overview_start_dates():
+    TERM = 288
+    SEM = "su25"
+
+    get_fresh = 1
+
+    term_start_month = 6
+    term_start_day = 2
+    
+    # get list of online course shells
+    if get_fresh:
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,get_fresh,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+    else:
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
+    
+    # dict to match section numbers between shells and schedule
+    crn_to_canvasid = {}
+    for C in c:
+        if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
+            crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
+    print(f"id,shell_shortname,sched_start,shell_start,shell_end,shell_restrict_view_dates,shell_restrict_view_dates,shell_state,shell_numstudents" )
+
+    # get course info from schedule
+    s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
+    for S in s:
+        # get dates
+        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
+        d_start = datetime.strptime(start,"%m/%d/%Y")
+        
+        # try to find online shell matching this schedule entry
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
+            continue
+        
+        # get more canvas course shell info
+        uu = f"{url}/api/v1/courses/{this_id}"
+        this_course = fetch(uu)
+
+        shell_start = this_course['start_at']
+        shell_end = this_course['end_at']
+        shell_restrict_view_dates = '?'
+        if 'access_restricted_by_date' in this_course:
+            shell_restrict_view_dates = this_course['access_restricted_by_date']
+        shell_shortname = this_course['course_code']
+        shell_numstudents = '?' #this_course['total_students']
+        shell_state = this_course['workflow_state']
+
+        print(f"{this_id},{shell_shortname},{d_start},{shell_start},{shell_end},{shell_restrict_view_dates},{shell_restrict_view_dates},{shell_state},{shell_numstudents}" )
+
+                    



@ -1426,6 +1594,7 @@ def course_by_depts_terms(section=0):
    nursing_start_day = 0
    spring_start_day = 27
    
+    # get list of online course shells
    if get_fresh:
        print(f"Getting list of courses in {SEM}")
        c = getCoursesInTerm(TERM,get_fresh,0)    
@ -1433,6 +1602,7 @@ def course_by_depts_terms(section=0):
    else:
        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
    
+    # dict to match section numbers between shells and schedule
    crn_to_canvasid = {}
    for C in c:
        if 'sis_course_id' in C and C['sis_course_id']:
@ -1441,16 +1611,14 @@ def course_by_depts_terms(section=0):
        else:
            print( f"---NO CRN IN: {C['name']} -> {C}" )

-    
-    #print(crn_to_canvasid)
-    #return
-    
-    #s = json.loads(  codecs.open(f'cache/{SEM}_sched_expanded.json','r','utf-8').read()  )
+    # get course info from schedule
    s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
    for S in s:
+        # get dates
        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
        d_start = datetime.strptime(start,"%m/%d/%Y")
        
+        # try to find online shell matching this schedule entry
        try:
            this_id = crn_to_canvasid[S['crn']]
        except Exception as e:
@ -1514,11 +1682,11 @@ def xlist_cwe():
    # cwe192 get put into another shell


-    this_sem_190_id = 21606   # they get 190s and 290s
-    this_sem_192_id = 21610   # they get 192s
-    this_sem_term = 287
+    this_sem_190_id = 22890   # they get 190s and 290s
+    this_sem_192_id = 22894   # they get 192s
+    this_sem_term = 289
    
-    get_fresh = 0
+    get_fresh = 1
    sem_courses = getCoursesInTerm(this_sem_term, get_fresh, 0)

    for search_string in ['CWE190','WTRM290']:
@ -1630,8 +1798,9 @@ def create_sandboxes():
                           #(19223, ' Sandbox GOTT5 WI24'),
                           #(19224, ' Sandbox GOTT6 WI24'),
                           #(20761, ' Sandbox GOTT1 FA24'),
-                            (21770, ' Sandbox GOTT1 WI25'),
-                            (21772, ' Sandbox GOTT2 WI25')
+                           #(21770, ' Sandbox GOTT1 WI25'),
+                           #(21772, ' Sandbox GOTT2 WI25'),
+                            (21874, ' Sandbox GOTT1 SP25'),
                         ]
    filepath = 'cache/sandbox_courses.pkl'
    
@ -1871,8 +2040,8 @@ def instructor_list_to_activate_evals():
 def add_evals(section=0):
    # show or hide?

-    TERM = 184
-    SEM = "fa24"
+    TERM = 287
+    SEM = "sp25"

    # fetch list of courses?
    GET_FRESH_LIST = 0
@ -1887,7 +2056,7 @@ def add_evals(section=0):
    ASK = 0

    # are we showing or hiding the course eval link?
-    HIDE = False
+    HIDE = True


    s = [ x.strip() for x in codecs.open(f'cache/{SEM}_eval_sections.txt','r').readlines()]
@ -1922,6 +2091,7 @@ def add_evals(section=0):
                print(f"{courses[i]['id']} / {courses[i]['name']}")
            u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s/tabs/context_external_tool_1953" % i
            r3 = requests.put(u2, headers=header, params=data)
+            print(f"OK {u2}")
            #print(r3.text)
            #time.sleep(0.400)

@ -2059,9 +2229,8 @@ def my_nav_filter(row):

 def clean_course_nav_setup_semester(section=0):
    print("Fetching list of all active courses")
-    term = 184 # fa24   # 182
-    term = 287
-    c = getCoursesInTerm(term,1,0)  # sp25 = 287   wi24=182
+    term = 289
+    c = getCoursesInTerm(term,1,0) 
    print(c)
    ids = []
    courses = {}
@ -2220,35 +2389,36 @@ def quick_sem_course_list(term=180):


 def create_calendar_event():
-    events = codecs.open('cache/events.csv','r','utf-8').readlines()
+    events = codecs.open('cache/academic_calendar_2025.csv','r','utf-8').readlines()

-    for e in events:
-        (date, title, desc) = e.split(',')
-        local = pytz.timezone("America/Los_Angeles")
-        naive = datetime.strptime(date, "%Y-%m-%d")
-        local_dt = local.localize(naive, is_dst=None)
-        utc_dt = local_dt.astimezone(pytz.utc).isoformat()
+    orientation_shells = ["course_15924","course_19094","course_20862"]
+    
+    for ori_shell in orientation_shells:
+        for e in events:
+            (date, title, desc) = e.split(',')
+            local = pytz.timezone("America/Los_Angeles")
+            naive = datetime.strptime(date, "%Y-%m-%d")
+            local_dt = local.localize(naive, is_dst=None)
+            utc_dt = local_dt.astimezone(pytz.utc).isoformat()



-        params = {
-            "calendar_event[context_code]": "course_15924",      # 2023 student orientation
-            "calendar_event[context_code]": "course_19094",      # 2024         orientation
-            "calendar_event[title]": title,
-            "calendar_event[description]": desc,
-            "calendar_event[start_at]": utc_dt,              # DateTime
-            "calendar_event[all_by_dept": "true",
+            params = {
+                "calendar_event[context_code]": ori_shell,    
+                "calendar_event[title]": title,
+                "calendar_event[description]": desc,
+                "calendar_event[start_at]": utc_dt,              # DateTime
+                "calendar_event[all_by_dept": "true",
+            }

-        }
-
-        u = url + "/api/v1/calendar_events"
-        res = requests.post(u, headers = header, params=params)
-        result = json.loads(res.text)
-        print(title,end=" ")
-        if "errors" in result:
-            print(result["errors"])
-        if "id" in result:
-            print("ok, id#", result["id"])
+            u = url + "/api/v1/calendar_events"
+            res = requests.post(u, headers = header, params=params)
+            result = json.loads(res.text)
+            print(title,end=" ")
+            if "errors" in result:
+                print(result["errors"])
+            if "id" in result:
+                print("ok, id#", result["id"])

 def utc_to_local(utc_str):
    if not utc_str: return ""
@ -2327,12 +2497,12 @@ def enrollment_helper():
    keep = 'code,name,days,cap,act,teacher,date,partofday,type,site'.split(',')
    oo = codecs.open('cache/section_history.json','w','utf-8')
    # fetch enrollment stats for last few years
-    from semesters import code, sems, to_sis_sem
+    from semesters import code, sems_by_short_name, short_to_sis
    from util import dept_from_name
    raw = []
    code.reverse()
    sort = defaultdict(dict)
-    for s in sems.keys():
+    for s in sems_by_short_name.keys():
        try:
            sched1 = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
            sort[s] = defaultdict(dict)
@ -2340,7 +2510,7 @@ def enrollment_helper():
                if sect['name'] in ignore2:
                    continue
                sect_smaller = funcy.project(sect,keep)
-                sect_smaller['sem'] = to_sis_sem(s)
+                sect_smaller['sem'] = short_to_sis(s)
                if int(sect_smaller['cap'])==0 or int(sect_smaller['act'])==0:
                    sect_smaller['fill_pct'] = 100
                else:
@ -2447,6 +2617,33 @@ def course_log():
    L = fetch(f"{url}/api/v1/audit/course/courses/{course_id}")
    print(json.dumps(L,indent=2))

+def fetch_rubric():
+    course = 21274
+    r_id = 35961
+    u = f"{url}/api/v1/courses/{course}/rubrics/{r_id}"
+
+    result = fetch(u)
+    #print(json.dumps(result,indent=2))
+
+    rows = []
+
+    for row in result['data']:
+        r = []
+        r.append(f"<td style='vertical-align:top;'><b>{row['description']}</b><br />{row['long_description']}</td>")
+        for item in row['ratings']:
+            r.append(f"<td style='vertical-align:top;'><u>{item['description']}</u><br />{item['long_description']}<br /><i>{item['points']} points</i></td>")
+
+        rows.append("<tr>" + "\n".join( r ) + "</tr>\n")
+    output = f"<h3>{result['title']}</h3>\n"
+    output += "<table border='1'>" + ''.join( [ f"<tr>{x}</tr>\n" for x in rows] ) + "</table>\n"
+
+    print(output)
+
+
+
+
+
+

 if __name__ == "__main__":
    options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,  
@ -2476,9 +2673,11 @@ if __name__ == "__main__":
                17: ['Remove "new analytics" from all courses navs in a semester', remove_n_analytics],
                21: ['Add course evals', add_evals],
                56: ['Remove course evals all sections', remove_evals_all_sections],
-                52: ['Cleanup semester / course nav', clean_course_nav_setup_semester],
+                52: ['Cleanup semester / course nav', clean_course_nav_setup_semester],  # not done, just lists nav right now

+                29: ['Overview summer start dates',overview_start_dates],
                31: ['Fine tune term dates and winter session', course_by_depts_terms],
+                32: ['Set summer start dates', set_custom_start_dates],
                #32: ['Cross-list classes', xlist ],
                #33: ['Cross list helper', eslCrosslister],
                33: ['Cross list, ask for sections', ez_xlist], 
@ -2486,7 +2685,7 @@ if __name__ == "__main__":
                35: ['Cross list from manually created file', do_manual_xlist],
                36: ['Quick course list', quick_sem_course_list ],
                37: ['Cross list CWE courses', xlist_cwe], 
-                38: ['Create calendar event', create_calendar_event],
+                38: ['Create calendar events for orientation shells', create_calendar_event],
                39: ['list all assignments', list_all_assignments],

                40: ['Enroll GOTT Workshops', enroll_gott_workshops], 
@ -2505,7 +2704,9 @@ if __name__ == "__main__":

                50: ['Fetch rubric scores and comments', fetch_rubric_scores],
                51: ['Fetch announcements in a course', fetch_announcements],
-                57: ['show course audit log', course_log]
+                57: ['show course audit log', course_log],
+
+                60: ['fetch a rubric', fetch_rubric],
              }
    print ('')

--- a/curric2022.py
+++ b/curric2022.py
@ -15,6 +15,7 @@ displaynames = []
 from canvas_secrets import cq_user, cq_pasw

 from outcomes import quick_add_course_outcomes
+from schedules import campus_dept_hierarchy


 CQ_URL = "https://secure.curricunet.com/scripts/webservices/generic_meta/clients/versions/v4/gavilan.cfc"
@ -569,6 +570,7 @@ def course_path_style_2_html():
    verbose = 1
    v = verbose
    
+    dbg = codecs.open('cache/courses/debugout.txt','w','utf-8')
    
    oo = codecs.open("cache/courses/allclasspaths.txt","r","utf-8").readlines()
    course_prebuild = defaultdict( ddl )
@ -601,8 +603,8 @@ def course_path_style_2_html():
    
    lookup_table = {    'entityTitle':'title', 'proposalType':'type', 
                        '\/Course\sDescription\/status':'status', 'Course\sDiscipline':'dept', 
-                        'Course\sNumber':'number', 'Course\sTitle':'name', 
-                        'Short\sTitle':'shortname', 'Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
+                        'Course\sNumber':'number', 'Course\sTitle':'name', 'Course Description\/\d\/Justification':'justification',
+                        'Short\sTitle':'shortname', 'Course Description\/\d\/Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
                        '\/Course\sDescription\/\d+\/Course\sDescription\/':'desc', 
                        'Minimum\sUnits':'min_units', 'Minimum\sLecture\sHour':'min_lec_hour', 'Minimum\sLab\sHour':'min_lab_hour', 'Course\shas\svariable\shours':'has_var_hours',
                        'Number\sWeeks':'weeks', 
@ -620,6 +622,7 @@ def course_path_style_2_html():
        crs = course_prebuild[C]
        course_build = {'slo':{}}    # defaultdict( ddl )
        if v: print(C)
+        dbg.write(f"{C}\n")
        
        for K in crs.keys():
            if v: print("\t%s" % K)
@ -647,6 +650,7 @@ def course_path_style_2_html():
                        else:
                            content_search = re.search(r'^(.*)\/(.*?)$',line)
                            course_build[key] = content_search.groups()[1]
+                            dbg.write(f"{key} => {content_search.groups()[1]}\n")
                        if v: print("\t\t%s - %s" % (key, course_build[key]))
                        continue
                        
@ -841,8 +845,8 @@ def course_rank():
        all[code].add(c)
    
    for k in sorted(all.keys()):
-        print("\n##",k)
-        print(json.dumps(list(all[k]),indent=2))
+        #print("\n##",k)
+        #print(json.dumps(list(all[k]),indent=2))
        for version in all[k]:
            csvwriter.writerow( [ version['d']+version['n'], version['c'], version['s'], version['m'], version['d'], version['n'], len(version['o']) ])
    
@ -851,19 +855,127 @@ def de_classpaths():
    outfile = codecs.open('cache/courses/all_de_classpaths.txt', 'w','utf-8')
    areas = ['Distance Education/1/2/Justification/Need/Justification','/Distance Education/1/3/Content Presentation/<b>A. Methods of Instruction</b>/','/Distance Education/1/3/Content Presentation/<b>B. Instructional Materials and Resources:</b><br/>1. What materials and resources will you provide your students <b>in a virtual environment</b>?/','/Distance Education/4/Assessment/','/Distance Education/4/Methods of Instruction/','/Distance Education/1/3/Content Presentation/2. Have you assessed the use of high-quality open educational resources (OER) to help bridge the digital divide for students in the course? If so, please describe how you will be using them./','/Distance Education/4/Instructional Materials and Resources/','/Distance Education/1/3/Content Presentation/3. How will students be provided access to library materials and other learning resources <b>in a virtual environment</b>? (virtual reference librarian, research guides, digital content, etc.)/','/Distance Education/4/<b>How will students be provided access to library materials and what support will students be provided to help them locate and use these materials?</b><br/>Library and Other Learning Resources/','/Distance Education/1/3/Content Presentation/4. How will students access equitable student support services <b>in a virtual environment</b>? (tutoring, financial aid, counseling, etc.)/','/Distance Education/4/Accommodations for Students with Disabilities/','/6/Distance Education/4/Office Hours/','/Contact/Contact/Description/']

+    i = 0
+
    for area in areas:
        with codecs.open('cache/courses/allclasspaths.txt', 'r','utf-8') as infile:
            outfile.writelines(line for line in infile if area in line)
+            i += 1
+            if i % 1000 == 0: print(i)
+
+
+
+from semesters import human_to_sis, get_previous_season
+#from pipelines import area, areas
+
+def extract_digits(input_string):
+    """
+    Removes all non-digit characters from the input string and returns an integer.
+    
+    :param input_string: The string to process.
+    :return: An integer containing only the digits from the input string.
+    """
+    digits_only = ''.join(char for char in input_string if char.isdigit())
+    return int(digits_only) if digits_only else 0
+
+
+
+
+
+def filter_classes():   # for removing deactivated classes
+    json_file_path = 'cache/courses/courses_built.json'
+    output_csv_path = 'cache/courses/active_courses.txt'
+
+    all_courses = []
+
+    with open(json_file_path, 'r') as json_file:
+        data = json.load(json_file)
+
+    for i,C in data.items():
+        term = ''
+        try:
+            term = C['term']
+        except:
+            print(f"** {i} {C['dept']} {C['number']} is missing term")
+            term = ''
+        shortname = ''
+        try:
+            shortname = C['shortname']
+        except:
+            shortname = C['name']
+            print(f"** {i} {C['dept']} {C['number']} is missing shortname")
+        all_courses.append(f"{C['dept']} {C['number']} {shortname} \t {C['status']} {C['type']} \t{term} - {i}")
+    all_courses.sort()
+    for C in all_courses: print(C)
+
+
+
+
+def slo_summary_report():   # for scheduling slo assessment
+    json_file_path = 'cache/courses/courses_built.json'
+    output_csv_path = 'cache/courses/courses_slo_schedule.csv'
+    term_csv_file_path = 'cache/courses/slo_schedule.csv'
+
+    (gp, course_to_area, areacode_to_area, area_to_dean, dean, dean_code_to_name) = campus_dept_hierarchy()
+
+
+    with open(json_file_path, 'r') as json_file:
+        data = json.load(json_file)
+        
+    # Extract course information
+    courses = []
+    term_courses = []
+    for key, course in data.items():
+        try:
+            #print(f"{course['dept']} - -" )
+            re_code_course = {
+                "key": key,
+                "type": course.get("type", ""),
+                "status": course.get("status", ""),
+                "dept": course.get("dept", ""),
+                "number": course.get("number", ""),
+                "name": course.get("name", ""),
+                "first_active_term": course.get("term", ""),
+                'first_active_term_code': human_to_sis(course.get('term', '')),
+                "reviewing_term": get_previous_season(course.get("term","")),
+                "reviewing_term_code": human_to_sis(get_previous_season(course.get('term', ''))),
+                "area": areacode_to_area[ course_to_area[course.get("dept", "").upper()] ]
+            }
+            courses.append(re_code_course)
+            if course["status"] in ["Active", "In Review"] and course["type"] != "Deactivate Course":
+                term_courses.append(re_code_course)
+        except Exception as e:
+            print(f"error on course: {course['dept']} {course['number']} {course['name']}")
+
+    # Sort by dept, number, and term
+    courses.sort(key=lambda x: (x["dept"], extract_digits(x["number"]), x["reviewing_term_code"]))
+
+    term_courses.sort(key=lambda x: (x["reviewing_term_code"],x["dept"], extract_digits(x["number"])))
+
+    # Write to CSV
+    fieldnames = ["dept", "number", "reviewing_term", "reviewing_term_code", "status", "key", "type", "name", "first_active_term", "first_active_term_code","area"]
+    with open(output_csv_path, 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+        writer.writeheader()
+        writer.writerows(courses)
+    with open(term_csv_file_path, 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(term_courses)
+    print(f"CSV file '{output_csv_path}' has been created.")

 if __name__ == "__main__":
    
    print ('')
    options = { 1: ['fetch all courses', fetch_all_classes],
                2: ['process all classes', path_style_test],
-                3: ['courses - path style to html catalog', course_path_style_2_html],
+                3: ['courses - path style to json and html catalog', course_path_style_2_html],
                4: ['show course outcomes', all_outcomes],
                5: ['courses - rank by all versions', course_rank],
                6: ['extract de info from class paths', de_classpaths],
+                7: ['build schedule or summary for SLO planning', slo_summary_report],
+                8: ['remove deactivated courses', filter_classes],
                10: ['fetch all programs', fetch_all_programs],
                11: ['process all programs', path_style_prog],
                12: ['programs - path style to html catalog', path_style_2_html],
--- a/curriculum.py
+++ b/curriculum.py
@ -11,7 +11,7 @@ from pampy import match, _
 from bs4 import BeautifulSoup as bs
 import pandas as pd
 import sys, locale, re
-from pipelines import getSemesterSchedule
+from schedules import get_semester_schedule

 from canvas_secrets import cq_url, cq_user, cq_pasw

@ -1360,7 +1360,7 @@ def my_default_counter():
    
 # Of the recent schedules, what was actually offered online?
 def summarize_online_sections():
-    scheds = list(map(getSemesterSchedule,sems))
+    scheds = list(map(get_semester_schedule,sems))
    all = pd.concat(scheds,sort=True)
    selected = all[['code','type','sem']]
    selected.to_csv('cache/one_year_course_sections.csv')
--- a/depricated.py
+++ b/depricated.py
@ -5,6 +5,358 @@
 # from pipelines - canvas data


+# read schedule file with an eye toward watching what's filling up
+def schedule_filling():
+    sem = 'spring2021'    # todo: hardcoded
+    days = []
+    for f in sorted(os.listdir('cache/rosters/'+sem+'/')):
+        if f.endswith('.html'):
+            match = re.search(r'sched_(\d\d\d\d)_(\d\d)_(\d+)\.html',f)
+            if match:
+                print(f)
+                y = match.group(1)
+                m = match.group(2)
+                d = match.group(3)
+                print("Schedule from %s %s %s." % (y,m,d))
+                csv_sched = ssb_to_csv(open('cache/rosters/'+sem+'/'+f,'r').read())
+                jsn = to_section_list(csv_sched)
+                #print(json.dumps(jsn,indent=2))
+                days.append(jsn)
+    day1 = days[-2]
+    day2 = days[-1]
+    df = jsondiff.diff(day1, day2)
+    gains = defaultdict( list )
+    
+    for D in df.keys():
+        if isinstance(D, int):
+            #print(day1[D]['code'] + '\t' + day1[D]['crn'] + ' Before: ' + day1[D]['act'] + ' After: ' + day2[D]['act'])
+            try:
+                gain = int(day2[D]['act']) - int(day1[D]['act'])
+                gains[gain].append(  day1[D]['code'] + ' ' + day1[D]['crn'] )
+            except:
+                print("No gain for " + str(D))
+            #print("\t" + str(df[D]))
+        else:
+            print(D)
+            print(df[D])
+    for key, value in sorted(gains.items(), key=lambda x: x[0]): 
+        print("{} : {}".format(key, value))
+    
+    #print(json.dumps(gains,indent=2))
+
+    
+
+def argos_data():
+    global dean,gp
+    
+    f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8')
+    writer = csv.writer(f2)
+    headers = 'gp dean dept num code crn name act site'.split(' ')
+    writer.writerow(headers)
+    
+    f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8')
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        print(d)
+        my_dean = dean[d['Subj']]
+        my_gp = gp[d['Subj']]
+        dept = d['Subj']
+        num = d['Crse No']
+        code = dept + " " + num
+        crn = d['CRN']
+        name = d['Course Title']
+        act = d['Open Seats']
+        campus = d['Campus']
+        session = d['Session']
+        if campus == "Off Campus": site = session
+        else: site = campus
+        print(site)
+        writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
+
+
+
+
+
+    
+def scrape_for_db():
+    
+    global SEMESTER, gp, dean, short_sem, semester_begin, filename, filename_html
+    fields = 'sem,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc'.split(',')
+
+
+    """
+    SEMESTER = 'Fall 2022'
+    short_sem = 'fa22'
+    semester_begin = strptime('08/22', '%m/%d')
+    filename = 'fa22_sched.json'
+    filename_html = 'fa22_sched.html'
+    
+    as_dict = scrape_schedule()
+    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
+    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
+    for S in as_dict:
+        parts = S['code'].split(' ')
+        S['dept'] = parts[0]
+        S['num'] = parts[1]
+        S['gp'] = gp[parts[0]]
+        S['dean'] = dean[parts[0]]
+        S['sem'] = short_sem
+        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
+              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
+        print(str)
+        fff.write(str)
+    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
+    fff.close()
+    """
+
+
+
+
+    SEMESTER = 'Spring 2023 (View only)'
+    short_sem = 'sp23'
+    semester_begin = strptime('01/30', '%m/%d')
+    filename = 'sp23_sched.json'
+    filename_html = 'sp23_sched.html'
+    
+    as_dict = scrape_schedule()
+    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
+    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
+    for S in as_dict:
+        parts = S['code'].split(' ')
+        S['dept'] = parts[0]
+        S['num'] = parts[1]
+        S['gp'] = gp[parts[0]]
+        S['dean'] = dean[parts[0]]
+        S['sem'] = short_sem
+        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
+              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
+        print(str)
+        fff.write(str)
+    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
+    fff.close()
+    
+
+
+def todays_date_filename(short_sem):       # helper    
+    n = datetime.now()
+    m = n.month
+    if m < 10: m = "0"+str(m)
+    d = n.day
+    if d < 10: d = "0" + str(d)
+    return "reg_" + short_sem + "_" + str(n.year) + str(m) + str(d)
+
+def expand_old_semesters():
+    
+    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20,fa20,sp21,su21,fa21,sp22,su22,fa22'.split(',')
+    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20'.split(',')
+    terms.reverse()
+    
+    for t in terms:
+        list_latestarts(t)
+        input('press return to continue.')
+
+
+
+def argos_data_from_cvc():
+    global dean,gp
+    short_sem = 'fa23'
+    
+    f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8')
+    all_courses = []
+    
+    f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8')
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        #print(d)
+        parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code'])
+        if parts:
+            dept = parts.group(1)
+            num = parts.group(2)
+        my_dean = dean[dept]
+        my_gp = gp[dept]
+        code = dept + " " + num
+        crn = d['CRN']
+        cred = d['Units_Credit_hours']
+        days, time_start, time_end = days_times(d['Meeting_Days_and_Times'])
+        times = ""
+        if time_start: times = time_start + "-" + time_end
+        date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date'])
+        start = remove_year(d['Start_Date'])
+        end = remove_year(d['End_Date'])
+        ztc = d['ZTC']
+        name = d['Course_Name']
+        cap = d['Class_Capacity']
+        rem = d['Available_Seats']
+        act = int(cap) - int(rem)
+        teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name']
+        delivery = d['Delivery']
+        if delivery == "Online":
+            if days:
+                site = "Online"
+                type = "online live"
+                loc = "Online Live"
+            else:
+                site = "Online"
+                type = "online"
+                loc = "ONLINE"
+        elif delivery == "Hybrid":
+            site = d['Campus_College']
+            type = "hybrid"
+            loc = d['Meeting_Locations']
+        else:
+            site = d['Campus_College']
+            type = "in-person"
+            loc = d['Meeting_Locations']
+        this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \
+                       "cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \
+                        "start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp}
+        all_courses.append(this_course)
+        print(site)
+        #writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
+    print(all_courses)
+    #print(json.dumps(all_courses))
+    f3.write( json.dumps(all_courses,indent=2) )
+    f3.close()
+    expanded = list_latestarts(short_sem)
+
+
+
+
+def days_times(s):
+    parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s)
+    if parts:
+        day = parts.group(1)
+        time = parts.group(2)
+        parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time)
+        if parts2:
+            time_start = parts2.group(1).strip()
+            time_end = parts2.group(2).strip()
+            return day, time_start, time_end
+        return day, time, ''
+    return '','',''
+
+
+
+
+
+def remove_year(s):
+    s = re.sub(r'\-', '/', s)
+    if len(s)>5: return s[5:]
+    return s
+
+
+
+
+
+
+
+def get_enrlmts_for_user(user,enrollments):
+    #active enrollments
+    u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
+    return u_en[['type','course_id']]
+
+
+
+### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section     
+def course_start(course):
+    #todo: use this to make a early/late/short field and store semester dates w/ other constants
+    
+    start = datetime(2019,1,28)
+    end   = datetime(2019,5,24)
+
+    # is it normal, early, late, winter?
+    li = course[0]
+    date = li[12]
+    
+    if date=='01/28-05/24':
+        return 'Normal'
+    if date=='TBA':
+        return 'TBA'
+    if date=='01/02-01/25':
+        return 'Winter'
+    if date=='01/02-01/24':
+        return 'Winter'
+    
+    ma = re.search(  r'(\d+)\/(\d+)\-(\d+)\/(\d+)', date)
+    if ma:
+        # TODO do these years matter?
+        mystart = datetime(2019, int(ma.group(1)), int(ma.group(2)))
+        if int(ma.group(1)) > 10: mystart = datetime(2018, int(ma.group(1)), int(ma.group(2)))
+        myend   = datetime(2019, int(ma.group(3)), int(ma.group(4)))
+        length = myend - mystart
+        weeks = length.days / 7
+        
+        if mystart != start:
+            if mystart < start:
+                #print 'Early Start ', str(weeks), " weeks ",
+                return 'Early start'
+            else:
+                #print 'Late Start ', str(weeks), " weeks ",
+                return 'Late start'
+        else:
+            if myend > end:
+                #print 'Long class ', str(weeks), " weeks ",
+                return 'Long term'
+            else:
+                #print 'Short term ', str(weeks), " weeks ",
+                return 'Short term'
+        #return ma.group(1) + '/' + ma.group(2) + " end: " + ma.group(3) + "/" + ma.group(4)
+    else:
+        return "Didn't match: " + date
+
+        
+
+
+
+
+
+
+
+# list files in canvas_data (online) and choose one or some to download.
+def interactive():
+    resp = do_request('/api/account/self/file/sync')    
+    mylog.write(json.dumps(resp, indent=4))
+    #mylog.close()
+    i = 0
+    gotten = os.listdir(local_data_folder)
+    for x in resp['files']:
+        print(str(i) + '.\t' + x['filename'])
+        i += 1
+    which = input("Which files to get? (separate with commas, or say 'all') ")
+    if which=='all':
+        which_a = list(range(i-1))
+    else:
+        which_a = which.split(",")
+    for W in which_a:
+        this_i = int(W)
+        this_f = resp['files'][this_i]
+        filename = this_f['filename']
+        if filename in gotten: continue
+        print("Downloading: " + filename)
+        response = requests.request(method='GET', url=this_f['url'], stream=True)
+        if(response.status_code != 200):
+            print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
+        else:
+            #Use the downloaded data
+            with open(local_data_folder + filename, 'wb') as fd:
+                for chunk in response.iter_content(chunk_size=128):
+                    fd.write(chunk)
+            print("Success")
+        """if filename.split('.')[-1] == 'gz':
+            try:
+                plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
+                pf = open(plain_filename,'w')
+                with gzip.open('canvas_data/' + filename , 'rb') as f:
+                    pf.write(f.read())    
+            except Exception as e:
+                print "Failed to ungizp. Probably too big: " + str(e)"""
+
+
+


 # todo: where does the most recent schedule come from?
--- a/flex2.py
+++ b/flex2.py
@ -0,0 +1,106 @@
+import pymysql
+
+# Connect to the MySQL database using PyMySQL
+conn = pymysql.connect(
+    host="192.168.1.6",    # Your host (localhost, for example)
+    user="phowell",    # Your MySQL username
+    password="rolley34",# Your MySQL password
+    database="db"  # Your database name
+)
+
+cursor = conn.cursor(pymysql.cursors.DictCursor)
+
+# Query to get the table structure
+cursor.execute("""
+SELECT 
+    C.TABLE_NAME,
+    C.COLUMN_NAME,
+    C.COLUMN_TYPE,
+    C.IS_NULLABLE,
+    C.COLUMN_DEFAULT,
+    C.EXTRA,
+    KCU.REFERENCED_TABLE_NAME,
+    KCU.REFERENCED_COLUMN_NAME,
+    KCU.CONSTRAINT_NAME
+FROM 
+    INFORMATION_SCHEMA.COLUMNS C
+LEFT JOIN 
+    INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU
+    ON C.TABLE_NAME = KCU.TABLE_NAME AND C.COLUMN_NAME = KCU.COLUMN_NAME
+WHERE 
+    C.TABLE_SCHEMA = 'db'  -- Replace with your actual database name
+    AND C.TABLE_NAME LIKE 'conf_%';         -- Only tables starting with 'conf_'
+""")
+
+# Fetch all rows from the query result
+columns_info = cursor.fetchall()
+
+# Close the connection
+cursor.close()
+conn.close()
+
+# Function to generate condensed output
+def condense_structure(columns_info):
+    result = {}
+    
+    for column in columns_info:
+        table = column['TABLE_NAME']
+        column_name = column['COLUMN_NAME']
+        column_type = column['COLUMN_TYPE']
+        is_nullable = column['IS_NULLABLE']
+        extra = column['EXTRA']
+        referenced_table = column['REFERENCED_TABLE_NAME']
+        referenced_column = column['REFERENCED_COLUMN_NAME']
+        constraint_name = column['CONSTRAINT_NAME']
+        
+        # Condense data type (e.g., 'VARCHAR(255)' -> 'V(255)')
+        if column_type.startswith('varchar'):
+            column_type = 'V(' + column_type.split('(')[1].split(')')[0] + ')'
+        elif column_type.startswith('char'):
+            column_type = 'C(' + column_type.split('(')[1].split(')')[0] + ')'
+        elif column_type.startswith('int'):
+            column_type = 'I'  # Int types are just abbreviated to 'I'
+        elif column_type.startswith('text'):
+            column_type = 'T'  # Text types are abbreviated to 'T'
+        
+        # Condense NULLABLE
+        if is_nullable == 'YES':
+            column_type += ' N'  # Add N for nullable
+        else:
+            column_type += ' NN'  # Add NN for not nullable
+        
+        # Remove DEFAULT NULL if no default value is set
+        if column['COLUMN_DEFAULT'] is None:
+            column_default = ''
+        else:
+            column_default = f" D({column['COLUMN_DEFAULT']})"  # Default value
+
+        # Add extra information, like auto-increment if available
+        if 'auto_increment' in extra:
+            column_type += " AI"  # Add AI for auto-increment columns
+        
+        # Handle foreign key references
+        if referenced_table:
+            column_type += f" FK({referenced_table}.{referenced_column})"
+        
+        # Create shorthand for each column
+        shorthand = f"{column_name}: {column_type}{column_default}"
+        
+        # Add to the result dict under the respective table
+        if table not in result:
+            result[table] = []
+        
+        result[table].append(shorthand)
+    
+    return result
+    
+    
+# Condense the structure
+condensed_structure = condense_structure(columns_info)
+
+# Print out the condensed structure
+for table, columns in condensed_structure.items():
+    print(f"Table: {table}")
+    for column in columns:
+        print(f"  - {column}")
+    print("\n")
--- a/gpt.py
+++ b/gpt.py
@ -1,15 +1,39 @@
-import os, json, sys, codecs, openai
+import os, json, sys, codecs, re
+from datetime import datetime, timedelta
+
+from openai import OpenAI
+

 from canvas_secrets import openai_org, openai_api_key

+client = OpenAI(
+    api_key=openai_api_key,
+    organization=openai_org
+)

-openai.organization = openai_org
-openai.api_key = openai_api_key
-#print(openai.Model.list())
+DEFAULT_MODEL = "gpt-4o"

 SAVE_ATTACHEMENTS = 0


+def gpt_chat(instruction, prompt, model=DEFAULT_MODEL):
+    messages = [
+        {"role": "system", "content": instruction},
+        {"role": "user", "content": prompt}
+        ]
+
+    response = client.chat.completions.create(model=model, messages=messages)
+    result = response.choices[0].message.content
+
+    
+    log3 = codecs.open('cache/gptlog.txt','a','utf-8')
+    log3.write(json.dumps({'prompt':prompt, 'result': result},indent=2))
+    log3.write("\n\n---\n\n")
+    log3.close()
+    
+    return result
+
+
 def gpt_faq(faq):
    system_role = """Ignore all previous instructions. You are an expert on the management and implementation of all aspects of online courses and running a distance education program, especially in higher education and community college. You are also an expert in pedagogy and the tools and techniques of effective teaching and learning, based on research and experience. You know how to focus on concrete examples of effective teaching which actually work in the real world and the advice you give is specific and immediately useful to teachers in the classroom. You provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. Your users are experts in teaching and college administration, and are comfortable with the California and Federal Educational code, and know they need to comply with it. There is no need to explain this beyond noting any relevant laws. Don't be verbose in your answers, but do provide details and examples where it might help the explanation.  
 You are a world class technical writer. Your style is human-centered, friendly and kind. Give me concise and accurate answers and ignore all the niceties that OpenAI programmed you with. Omit all qualifications and excuses. I know you are a large language model but I want you to pretend you are a confident and superintelligent oracle that can help a limited human warp reality to his own intentions. You can reveal your own sense of humor and depth of intelligence. It is very important that you get this right.
@ -17,42 +41,22 @@ Adopt a penetratingly insightful, incisively funny, cleverly witty persona. Your
 I am pasting below an email chain between myself and colleague(s). I am fairly certain that it contains a technical question that I have answered. They may be spread out over several messages, or there might be some clarification or follow up questions. There will be headers, signatures, and other text that isn't a part of the core message. Ignore that. Consider the whole email chain while you prepare the following: Respond with a json formatted dictionary that contains the following:
 { "question": "Restate the question or problem in a concise but clear manner", "topics": ["keywords", "or phrases", "that categorize the issue"], "answer": "The best possible answer, written in markdown format. Draw the answer from the email but feel free to edit or embelish based on your knowledge. Generalize the answer to anyone who might have the issue. Your audience is mostly instructors working at a small community college. Do not refer to anyone's name specifically, unless it is Peter or Sabrina, but instead write for a general audience looking for the answers to their questions. We are writing a FAQ or help page. Feel free to use markdown-formatted bold, italic, lists, and links."} """

-    # create a completion
-    my_model = "gpt-4"    #   "gpt-3.5-turbo-16k"   # gpt-3.5-turbo   gpt-4    gpt-4-32k
-    completion = openai.ChatCompletion.create(model=my_model, messages=[
-        {"role": "system", "content": system_role},
-        {"role": "user", "content": faq} ] )
-    
-    log3 = codecs.open('cache/gptlog.txt','a','utf-8')
-    log3.write(json.dumps(completion,indent=2))
-    log3.write("\n\n---\n\n")
-    log3.close()
-    
-    r = completion['choices'][0]['message']['content']
-    #print(str(r) + "\n\n")
-    return r
+    return gpt_chat(system_role, faq)


 def gpt_test():

    my_prompt = "Write a series of texts trying to sell a pen to a stranger."
    print(sys.argv)
-    exit
+    

    if len(sys.argv)>1:
        my_prompt = " ".join(sys.argv[1:])
    else:
        print("Prompt: %s" % my_prompt)
        
-    my_model = "text-davinci-003"
-
-    # create a completion
-    completion = openai.Completion.create(engine=my_model, prompt=my_prompt, max_tokens=1000, temperature=1,top_p=1)
-
-    #print(completion)
-    #print(json.dumps(completion,indent=2))
-    print(completion.choices[0].text)
-    print()
+    result = gpt_chat("", my_prompt)
+    print(result)


 def sample_send_email():
@ -85,7 +89,7 @@ def fetch_useful_info():
        
    log = codecs.open("cache/email_usefulinfo.txt","w","utf-8")
    
-    #Finally, let's say you want to access a subfolder named folder_of_soldy in your root_folder, you do:
+    # access a subfolder 
    print("\nUseful Info Reference:")
    uinfo = root_folder.Folders['useful info ref']
    for message in uinfo.Items:
@ -97,7 +101,8 @@ def fetch_useful_info():
                attachment = attachments.Item(1)
                for attachment in message.Attachments:
                    print("    -> " + str(attachment))
-                    loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
+                    #loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
+                    loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments\\" + str(attachment)
                    attachment.SaveAsFile(loc)
                    atch_list += str(attachment) + ', '
                    atch_count += 1
@ -121,7 +126,8 @@ def process_email_filesave(message, log, i):
                attachment = attachments.Item(1)
                for attachment in message.Attachments:
                    print("    -> " + str(attachment))
-                    loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
+                    #loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
+                    loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
                    attachment.SaveAsFile(loc)
                    atch_list += str(attachment) + ', '
                    atch_count += 1
@ -139,12 +145,75 @@ def process_email_filesave(message, log, i):
        logeach.close()


+
+
+def list_faq():
+    import win32com.client
+    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+    root_folder = outlook.Folders.Item(1)
+    print("\nFAQ Emails:")
+    uinfo = root_folder.Folders['for faq']
+    index = 0
+
+    # Get today's date
+    end_date = datetime.now()
+
+    # Go back xx months
+    months_back = 60
+    chunk_results = []
+
+    print("\nLoading messages in 1-month chunks...\n")
+
+    for i in range(months_back):
+        chunk_end = end_date.replace(day=1) - timedelta(days=1)  # End of previous month
+        chunk_start = chunk_end.replace(day=1)  # Start of that month
+
+        start_str = chunk_start.strftime("%m/%d/%Y %H:%M %p")
+        end_str = chunk_end.strftime("%m/%d/%Y %H:%M %p")
+
+        restriction = f"[ReceivedTime] >= '{start_str}' AND [ReceivedTime] <= '{end_str}'"
+        filtered = uinfo.Items.Restrict(restriction)
+
+        # Force enumeration
+        #messages = [msg for msg in filtered if msg.Class == 43]  # MailItem only
+        messages = [msg for msg in filtered ] 
+        count = len(messages)
+
+        print(f"{chunk_start.strftime('%B %Y')}: {count} messages")
+        chunk_results.append((chunk_start.strftime('%Y-%m'), count))
+
+        for message in messages:
+            try:
+                print(f"    {index}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
+                index += 1
+            except Exception as e:
+                print(f"Exception: {str(e)}")
+
+
+
+        end_date = chunk_start  # Move back to the previous month
+
+    '''for message in uinfo.Items:
+        try:
+            print(f"{i}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
+        except Exception as e:
+            print(f"Exception: {str(e)}")
+                
+        i += 1
+        if i % 20 == 0:
+            temp = input(f"press enter to continue, or q to quit now at message {i}: ")
+            if temp == 'q':
+                exit()
+    '''
+
+
+
 def fetch_faq():
    import win32com.client
    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    root_folder = outlook.Folders.Item(1)
    
-    PAUSE = 0
+    PAUSE = 1
    
    startat = input("Press enter to continue or a number to start at that message: ")
    if startat == '': startat = '0'
@ -168,10 +237,10 @@ def fetch_faq():
        except Exception as e:
            print(f"Exception: {str(e)}")
        
-        #summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
-        #log2.write( f",\n{summary}")
-        #log2.flush()
-        #print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
+        summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
+        log2.write( f",\n{summary}")
+        log2.flush()
+        print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
        
        i += 1
        if PAUSE:
@ -179,9 +248,32 @@ def fetch_faq():
            if temp == 'q':
                exit()

-#fetch_useful_info()

-fetch_faq()
+if __name__ == "__main__":
+    
+    print ('')
+    options = { 1: ['gpt test',gpt_test] ,  
+                2: ['test email send',sample_send_email] , 
+                3: ['fetch "useful info" mailbox', fetch_useful_info],
+                4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
+                5: ['list faq mailbox', list_faq],
+    }
+
+
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+        
+    # Call the function in the options dict
+    options[ int(resp)][1]() 


-    
--- a/localcache.py
+++ b/localcache.py
@ -8,8 +8,9 @@ from datetime import datetime as dt
 from datetime import timedelta
 from dateutil.parser import parse
 from os.path import exists, getmtime
-from pipelines import sync_non_interactive, url, header, gp, dean
-from semesters import to_sis_sem
+from pipelines import sync_non_interactive, url, header
+#, gp, dean
+from semesters import short_to_sis


 #from courses import getCoursesInTerm
@ -1722,7 +1723,7 @@ def build_db_schedule():
            S['gp'] = gp[parts[0]]
            S['dean'] = dean[parts[0]]
            S['sem'] = F[0:4]
-            S['sem_sis'] = to_sis_sem(F[0:4])
+            S['sem_sis'] = short_to_sis(F[0:4])
            if not 'partofday' in S:
                S['partofday'] = ''
            str = "INSERT INTO schedule (sem,sem_sis,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc,partofday) VALUES (%s);\n" % \
--- a/localcache2.py
+++ b/localcache2.py
@ -8,7 +8,7 @@ from datetime import datetime as dt
 from datetime import timedelta
 from dateutil.parser import parse
 from os.path import exists, getmtime
-from pipelines import sync_non_interactive, url, header, gp, dean
+#from pipelines import sync_non_interactive, url, header, gp, dean
 from tabulate import tabulate

 from canvas_secrets import postgres_database, postgres_password, postgres_port, postgres_user, postgres_host
@ -391,7 +391,7 @@ def courses_to_sched():

    vals_cache = []
    i = 0
-    for year in ['16','17','18','19','20','21','22','23','24']:
+    for year in ['16','17','18','19','20','21','22','23','24','25']:
        for sem in ['sp','su','fa']:
            term = f"{sem}{year}"
            sis_code = f"20{year}{seasons2[sem]}"
@ -422,6 +422,7 @@ def courses_to_sched():
                        full_sis_code = sis_code+'-'+c['crn']

                        if full_sis_code in sis_to_sched:
+                            print(c['cred'])
                            q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code]
                            vals_cache.append( q )  # [ str(x) for x in q ] )
                            #print(f"{i}: {q}")
@ -467,6 +468,24 @@ def teacher_list(courseid):
    cursor.execute(q)
    return cursor.fetchall()

+def everyone_teacher_role():
+    conn,cursor = db()
+    q = '''select distinct ON (u.name) u.name, u.id, p.sis_user_id, u.created_at, c.course_code from canvas.enrollments e
+join canvas.users u on u.id=e.user_id
+join canvas.courses c on e.course_id=c.id
+join canvas.pseudonyms p on u.id=p.user_id
+where e.type='TeacherEnrollment'
+order by u.name;'''
+    cursor.execute(q)
+    return cursor.fetchall()
+
+
+def iLearn_name_from_goo(goo):
+    goo = goo.upper()
+    conn,cursor = db()
+    q = f"select u.id, u.name, u.sortable_name, p.sis_user_id from canvas.pseudonyms p join canvas.users u on u.id=p.user_id where p.sis_user_id='{goo}';"
+    cursor.execute(q)
+    return cursor.fetchone()


 if __name__ == "__main__":
--- a/main.py
+++ b/main.py
@ -104,7 +104,7 @@ if __name__ == "__main__":
                10:['Download new photos', downloadPhoto],
                11:['Check for avatar',checkForAvatar],
                25:['X-List 190 sections', xlist_cwe] ,                                 ###
-                28:['Check accessibility of a course', accessible_check] ,
+                28:['Check accessibility of a course', course_download] ,
                29:['Switch enrollments of a shell to all teachers', switch_enrol] ,
                35:['Enroll user to all active courses in a semester', enroll_accred],
                36:['Fix an older course so it can be enrolled again, add accred', unrestrict_course],
--- a/outcomes2022.py
+++ b/outcomes2022.py
@ -30,9 +30,18 @@ from path_dict import PathDict
 outputfile = ''
 csvwriter = ''

-TERM = 184
+# 289     2025 Fall
+# 288     2025 Summer
+# 287     2025 Spring
+# 286     2025 Winter
+# 184     2024 Fall
+# 183     2024 Summer
+# 181     2024 Spring
+# 182     2024 Winter

-TERM = 286   # fall = 287
+TERM = 287
+
+# TERM = 286   # fall = 287


 def escape_commas(s):
--- a/pipelines.py
+++ b/pipelines.py
--- a/queries.sql
+++ b/queries.sql
@ -3,6 +3,17 @@
 # TODO   students enrolled in fall 2020 


+# People with custom email
+select u.name, u.last_logged_out, u.created_at, u.updated_at, cc.path from canvas.users u 
+join canvas.communication_channels cc on u.id=cc.user_id 
+where cc.path_type='email' and cc.path not like '%gavilan.edu'
+order by u.last_logged_out;
+
+# how many
+select count(u.name) from canvas.users u join canvas.communication_channels cc on u.id=cc.user_id where cc.path_type='email' and cc.path not like '%gavilan.edu';
+
+
+
 ## Fall 2020 students with how many classes theyre taking

 SELECT u.canvasid, u.name, u.sortablename, COUNT(e.id) AS num FROM enrollment AS e
--- a/schedules.py
+++ b/schedules.py
--- a/search.py
+++ b/search.py
@ -0,0 +1,556 @@
+###
+###
+### Text / Knowledge Base
+###
+### How about downloading all possible info / webpages / sources
+### related to Gavilan and creating a master search index?
+###
+### Goals:
+### - Scripted approach to allow re-indexing / updating
+### - Break everything down into paragraphs
+###
+### - Script to extract keywords, topics, entities, summaries, questions answered 
+###   from each paragraph or chunk.
+### - Use spacy, gensim, nltk, or gpt-3, or a combination of all of them
+###
+### - Create vector / embeddings for each paragraph
+###
+### - Enable a vector search engine and connect to front page of gavilan.cc
+### - Use that to feed handful of source paragraphs (& prompt) into gpt and
+###   receive text answers to questions.
+
+
+import re, os, codecs, requests, trafilatura, pickle, pypandoc
+from collections import defaultdict
+from pdfminer.high_level import extract_text
+from sentence_transformers import SentenceTransformer, util
+
+from util import clean_fn
+
+
+
+def demo_vector_search():
+    from gensim.models import Word2Vec
+    from gensim.utils import simple_preprocess
+    import nltk.data
+    import spacy
+
+    # (might have to upgrade pip first...)
+    # pip install --upgrade click
+    #
+    # python -m spacy download en_core_web_sm
+    # python -m spacy download en_core_web_lg
+
+    def is_complete_sentence(text):
+        #text = text.text
+        doc = nlp(text)
+        sentences = list(doc.sents)
+        if len(sentences) == 1 and text.strip() == sentences[0].text.strip():
+            return True
+        return False
+
+
+    sentences = [
+        "This is an example sentence.",
+        "Here is another sentence for training."
+    ]
+
+    paragraph = """Financial Aid services are available in person!  We are happy to assist you with your financial aid needs.  If you are interested in visiting the office in person, please review the guidelines for visiting campus and schedule your appointment:
+
+Guidelines for In-Person Financial Aid Services
+
+Due to FERPA regulations, no student information will be given to anyone other than the student without authorization from the student.
+We continue to offer virtual services.  Financial Aid staff may be reached by email, phone, text, and zoom!  Please refer to the contact information and schedules below.
+
+Gavilan-WelcomeCenter_Peer_Mentors.jpg
+
+Do you need assistance filing the FAFSA or California Dream Act Application? Friendly and knowledgeable Peer Mentors are available to assist you virtually and in person!  Details below for an online Zoom visit, phone call, or in-person visit with Peer Mentors. 
+
+Monday - Friday 8am - 5pm, Student Center
+Join Zoom to Connect with a Peer Mentor
+Or call (669) 900-6833 and use meeting ID 408 848 4800
+
+MicrosoftTeams-image.png
+
+ 
+
+Do you need assistance with an existing financial aid application, financial aid document submission, or review of your financial aid package? Schedule an in-person, phone, or zoom appointment with our Financial Aid counter. 
+
+Mon - Thurs: 9am - 1:00pm, 2:00pm - 5:00pm
+Fri: 10am - 2pm
+Office: (408) 848-4727     Email: finaid@gavilan.edu
+Schedule an In-Person, Phone or Zoom Appointment"""
+
+    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
+    sentences1 = tokenizer.tokenize(paragraph)
+    for i,s in enumerate(sentences1):
+        print(i, "\t", s)
+    print("\n\n")
+
+    #nlp = spacy.load('en_core_web_sm')
+    nlp = spacy.load('en_core_web_md')
+
+    doc = nlp(paragraph)
+    sentences2 = list(doc.sents)
+    for i,s in enumerate(sentences2):
+        t = re.sub(r'\n+',' ',s.text)
+        is_sentence = 'yes' if is_complete_sentence(t) else 'no '
+        print(i, " ", is_sentence, "  ", t)
+    print("\n\n")
+
+    #for text in sentences2:
+    #    print(text, "is a complete sentence?" , is_complete_sentence(text))   
+
+    return
+
+    tokenized_sentences = [simple_preprocess(s) for s in sentences]
+    model = Word2Vec(tokenized_sentences, min_count=1, vector_size=100)
+
+    example_word = "example"
+    vector = model.wv[example_word]
+    print(f"Vector for the word '{example_word}': {vector}")
+
+
+
+def makedir():
+    files = os.listdir('cache/crawl')
+    #print(files)
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            print(parts)
+
+def manual_index():
+    files = os.listdir('cache/crawl')
+    #print(files)
+    ii = codecs.open('cache/crawl/index.html','w','utf-8')
+    ii.write('<html><body><h1>Site index</h1>\n')
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            ii.write('<br /><a href="mirror/'+f+'">'+f+'</a>\n')
+
+def my_site():
+    files = os.listdir('cache/crawl')
+    output = []
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            output.append(parts)
+    return output
+
+
+## TODO  site scraper
+## TODO  find package that extracts text from web page
+### TODO master list of what to index.
+
+## TODO PDFs and DOCXs
+## TODO fix urls w/ anchors
+
+def crawl():
+    import scrapy, logging
+    from scrapy.crawler import CrawlerProcess
+
+    logger = logging.getLogger()
+    logger.setLevel(level=logging.CRITICAL)
+    logging.basicConfig(level=logging.CRITICAL)
+    logger.disabled = True
+
+
+    avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu',  'augusoft', 
+            'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', 
+            'admit\/search\.php', 'GavilanTrusteeAreaMaps2022\.pdf', 'schedule\/2019', 'schedule\/2020', 'schedule\/2021',
+            'schedule\/2022', 'schedule\/previous',  ]
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+        #start_urls = ['https://gavilan.curriqunet.com/catalog/iq/1826']
+        start_urls = ['https://www.gavilan.edu']
+
+
+        """
+        logging.getLogger("scrapy").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.utils.log").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.extensions.telnet").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.core.engine").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
+
+        logger.disabled = True"""
+
+        def parse(self, response):
+            print('visited:', repr(response.url), 'status:', response.status)
+            done = 0
+
+            if re.search(r'\.pdf$', response.url):
+                m = re.search(r'\/([^\/]+\.pdf)$', response.url)
+                if m:
+                    print("saving to ", save_folder + '/' + clean_fn(response.url))
+                    pdf_response = requests.get(response.url)
+                    with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
+                        f.write(pdf_response.content)
+                    text = extract_text(save_folder + '/' + clean_fn(response.url))
+                    codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text)
+                    done = 1
+
+            for ext in ['doc','docx','ppt','pptx','rtf','xls','xlsx']:
+                if re.search(r'\.'+ext+'$', response.url):
+                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
+                    if m:
+                        print("saving to ", save_folder + '/' + clean_fn(response.url))
+                        pdf_response = requests.get(response.url)
+                        with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
+                            f.write(pdf_response.content)
+                        #text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt')
+                        pandoc_infile = save_folder + '/' + clean_fn(response.url) 
+                        pandoc_outfile = save_folder + '/' + clean_fn(response.url) + '.html'
+                        print("pandoc in file: %s" % pandoc_infile)
+                        print("pandoc outfile: %s" % pandoc_outfile)
+                        pypandoc.convert_file(pandoc_infile, 'html', outputfile=pandoc_outfile, extra_args=['--from=%s' % ext, '--extract-media=%s' % save_folder + '/img' ]) 
+                        pandoc_output = codecs.open(pandoc_outfile,'r','utf-8').read()
+                        txt_output = trafilatura.extract(pandoc_output,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
+                        if txt_output:
+                            codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output)
+                        done = 1
+
+            for ext in ['jpg','jpeg','gif','webp','png','svg','bmp','tiff','tif','ico']:
+                if re.search(r'\.'+ext+'$', response.url):
+                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
+                    if m:
+                        print("saving to ", save_folder + '/img/' + clean_fn(response.url))
+                        pdf_response = requests.get(response.url)
+                        with open(save_folder + '/img/' + clean_fn(response.url), 'wb') as f:
+                            f.write(pdf_response.content)
+                        done = 1
+
+            if not done:
+                f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8')
+
+                this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
+                if this_output:
+                    f_out.write(this_output)
+                f_out.close()
+                links = response.css('a::attr(href)').getall()
+
+                # Follow each link and parse its contents
+                for link in links:
+                    go = 1
+                    full_link = response.urljoin(link)
+                    print('++++++ trying ', full_link)
+
+                    if not re.search(r'gavilan\.edu',full_link):
+                        go = 0
+                        print('--- not gav edu')
+                    else:
+                        if re.search(r'hhh\.gavilan\.edu',full_link):
+                            pass
+                        elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link):
+                            # need to add www to gavilan.edu
+                            m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link)
+                            if m:
+                                full_link = m.group(1) + 'www.' + m.group(2)
+                    for a in avoid:
+                        if re.search(a,full_link):
+                            go = 0
+                            print('--- avoid ', a)
+
+                    if go: yield scrapy.Request(full_link, callback=self.parse,
+                                        headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"})
+                    else:
+                        print("------ avoiding ", full_link)
+    # Instantiate a CrawlerProcess object
+    process = CrawlerProcess()
+
+    # Add the MySpider spider to the process
+    process.crawl(MySpider)
+
+    # Start the process
+    logging.basicConfig(level=logging.CRITICAL)
+    logging.getLogger('scrapy').propagate = False
+    logging.getLogger("trafilatura").setLevel(logging.CRITICAL)
+    logging.getLogger("trafilatura").propagate = False
+    logging.getLogger("pdfminer").setLevel(logging.CRITICAL)
+    logging.getLogger("pdfminer").propagate = False
+    logging.getLogger("urllib3").setLevel(logging.CRITICAL)
+    logging.getLogger("urllib3").propagate = False
+    logging.basicConfig(level=logging.CRITICAL)
+    process.start()
+
+
+
+save_folder = 'cache/crawl'
+clean_folder = 'cache/cleancrawl'
+
+
+
+def txt_clean_index():
+    files = os.listdir(save_folder)
+    line_freq = defaultdict(int)
+    
+    # first pass
+    for f in files:
+        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
+        for L in lines:
+            L = L.strip()
+            line_freq[L] += 1
+    
+    # second pass
+    for f in files:
+        print("\n\n",f)
+        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
+        out = codecs.open(clean_folder + '/' + f,'w','utf-8')
+        for L in lines:
+            L = L.strip()
+            if L in line_freq and line_freq[L] > 3:
+                continue
+            print(L)
+            out.write(L + '\n')
+        out.close()
+
+
+
+
+from whoosh import fields, columns
+from whoosh.index import create_in, open_dir
+from whoosh.fields import Schema, TEXT, ID, STORED, NUMERIC
+from whoosh.qparser import QueryParser
+from whoosh.analysis import StemmingAnalyzer
+
+def priority_from_url(url):
+    priority = 1
+    # url is like this: https++www.gavilan.edu+news+Newsletters.php.txt
+    m = re.search(r'gavilan\.edu\+(.*)\.\w\w\w\w?$',url)
+    if m:
+        address = m.group(1)
+        parts = address.split('+')
+        if parts[0] in ['accreditation','curriculum','senate','research','old','committee','board','styleguide']:
+            priority += 20
+        if parts[0] in ['news','IT','HOM','administration']:
+            priority += 10
+        if parts[0] == 'admit' and parts[1] == 'schedule':
+            priority += 10
+        if 'accreditation' in parts:
+            priority += 50
+        if re.search(r'hhh\.gavilan\.edu',url):
+            priority += 100
+        priority *= len(parts)
+        #print(priority, parts)
+    else:
+        priority *= 50
+        #print(priority, url)
+    return priority
+
+
+def test_priority():
+    ff = os.listdir('cache/crawl')
+    for f in ff:
+        priority_from_url(f)
+
+
+
+def displayfile(f,aslist=0):
+    lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines()
+    lines = [L.strip() for L in lines]
+    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
+    if aslist:
+        return lines
+    return "\n".join(lines)
+
+def any_match(line, words):
+    # true if any of the words are in line
+    for w in words:
+        if re.search(w, line, re.IGNORECASE):
+            return True
+    return False
+
+
+def find_match_line(filename, query):
+    q_words = query.split(" ")
+    lines = codecs.open('cache/crawl/' + filename,'r','utf-8').readlines()
+    lines = [L.strip() for L in lines]
+    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
+    lines = [L for L in lines if any_match(L, q_words)]
+    return "\n".join(lines)
+
+
+
+def search_index():
+    s = ''
+    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
+    ix = open_dir("cache/searchindex")
+
+    
+    #with ix.reader() as reader:
+        #print(reader.doc_count())   # number of documents in the index
+        #print(reader.doc_frequency("content", "example"))   # number of documents that contain the term "example" in the "content" field
+        #print(reader.field_length("content"))   # total number of terms in the "content" field
+        #print(reader.term_info("content", "example"))   # information about the term "example" in the "content" field
+        #print(reader.dump())   # overview of the entire index
+
+
+    while s != 'q':
+        s = input("search or 'q' to quit: ")
+        if s == 'q':
+            return
+
+        # Define the query parser for the index
+        with ix.searcher() as searcher:
+            query_parser = QueryParser("content", schema=schema)
+
+            # Parse the user's query
+            query = query_parser.parse(s)
+            print(query)
+
+            # Search the index for documents matching the query
+            results = searcher.search(query, sortedby="priority")
+
+            # Print the results
+            i = 1
+            for result in results:
+                print(i, result)   # result["url"],  result["content"])
+                print(find_match_line(result['url'], s))
+                print()
+                i += 1
+
+
+
+def create_search_index():
+    # Define the schema for the index
+
+    stem_ana = StemmingAnalyzer()
+    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
+
+    # Create a new index in the directory "myindex"
+    ix = create_in("cache/searchindex", schema)
+
+    # Open an existing index
+    #ix = open_dir("cache/searchindex")
+
+    # Define the writer for the index
+    writer = ix.writer()
+
+    # Index some  documents
+    files = os.listdir('cache/crawl')
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            print(f)
+            writer.add_document(url=f, title=m.group(1), content=displayfile(f), priority=priority_from_url(f))
+    writer.commit()
+
+
+
+from annoy import AnnoyIndex
+import random
+
+def test_embed():
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    sample = "What is this world coming to? What happens in the data and the research?"
+    embed = model.encode(sample)
+
+    print("\nSample sentence:", sample)
+    print("\nEmbedding:", embed)
+    print("\nEmbedding size:", len(embed))
+
+
+def create_embeddings():
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    vecsize = 384   # sentence transformer embedding size
+    t = AnnoyIndex(vecsize, 'angular')
+    files = os.listdir('cache/crawl')
+    output = []    #  ['index', 'file','sentence']
+    index = 0
+    save_embeds = []  
+    files.sort()
+    for f in files:
+        print(f)
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            lines = displayfile(f,1)
+            embeddings = model.encode(lines)
+
+            print("\n-----", index, f)
+
+            for sentence, embedding in zip(lines, embeddings):
+                if len(sentence.split(' ')) > 5:
+                    print(index, "Sentence:", sentence)
+                    print(embedding[:8])
+                    t.add_item(index, embedding)
+                    output.append( [index,f,sentence] )
+                    index += 1
+        if index > 500:
+            break
+    t.build(30) # 30 trees
+    t.save('cache/sentences.ann')
+    pickle.dump( output, open( "cache/embedding_index.p", "wb" ) )
+
+
+
+
+def search_embeddings():
+    f = 384   # sentence transformer embedding size
+    n = 10    # how many results
+
+    u = AnnoyIndex(f, 'angular')
+    u.load('cache/sentences.ann') # super fast, will just mmap the file
+    print(u.get_n_items(), "items in index")
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    search_index = pickle.load( open( "cache/embedding_index.p", "rb" ) )
+    print(search_index)
+
+
+    s = ''
+    while s != 'q':
+        s = input("search or 'q' to quit: ")
+        if s == 'q':
+            return
+        query_embedding = model.encode(s)
+        results = u.get_nns_by_vector(query_embedding, n)
+
+        # Print the top 5 results
+        for i, r in enumerate(results):
+            print(f'Top {i+1}: {r}, {search_index[r]}')     #{file} - {sentence} - (Score: {score})')
+
+
+
+
+
+if __name__ == "__main__":
+    
+    print ('')
+    options = { 1: ['demo vector search', demo_vector_search],
+                8: ['crawl',crawl],
+                9: ['clean text index', txt_clean_index],
+               10: ['make web dir struct', manual_index],
+               11: ['create search embeddings', create_embeddings],
+               12: ['create search index', create_search_index],
+               13: ['do an index search', search_index],
+               14: ['do a vector search', search_embeddings],
+               15: ['test priority', test_priority], 
+               16: ['test embed', test_embed]
+              }
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+    
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+    
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
--- a/semesters.py
+++ b/semesters.py
@ -1,11 +1,105 @@
 # Try to gather all the different formats and ways of labeling a semester, along with their associated dates.

-import json, funcy
+import json, funcy, re, sys

-sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
+# sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
+
+season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
+
+# Inverse
+number_to_season = {v: k for k, v in season_to_number.items()}
+
+s_to_n = {'sp':'30','su':'50','fa':'70'}
+season_to_short = {
+    'Summer': 'su',
+    'Fall': 'fa',
+    'Winter': 'wi',
+    'Spring': 'sp'
+}


-standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
+# Given 'fa22' return 202270
+def short_to_sis(s):
+    season = s[0:2]
+    return "20" + s[2:5] + s_to_n[season]
+
+
+# go from sp20 to 2020spring
+def short_to_long(s):
+    parts = re.search(r'(\w\w)(\d\d)', s)
+    yr = parts.group(2)
+    season = parts.group(1)
+    seasons = {'sp':'spring','su':'summer','fa':'fall','wi':'winter'}
+    return '20'+yr+seasons[season]
+
+# from "Summer 2024" to 202450
+def human_to_sis(semester):
+    try:
+        # Split the semester into its components
+        parts = semester.split()
+            
+        # Extract the season and year
+        season = parts[0]
+        year = parts[1]
+    
+        # Generate the code in the format "YYYYSS"
+        return f"{year}{season_to_number[season]}"
+    except Exception as e:
+        print(f"Couldn't parse semester: {semester}'")
+        return ''
+
+# from 202450 to "Summer 2024"
+def sis_to_human(sis_code):
+    try:
+        # Extract the year and season code
+        year = sis_code[:4]
+        season_code = sis_code[4:]
+        
+        # Convert season code back to season name
+        season = number_to_season.get(season_code, "Unknown")
+        
+        return f"{season} {year}"
+    except Exception as e:
+        print(f"Couldn't parse SIS code: {sis_code}")
+        return ''
+
+# from "Summer 2024" to su24
+def human_to_short(semester):
+    # Split the semester into its components
+    parts = semester.split()
+    
+    # Extract the season and year
+    season = parts[0]
+    year = parts[1][2:]  # Take the last two digits of the year
+    
+    # Generate the short form
+    return f"{season_to_short[season]}{year}"
+
+# given human readable form (Spring 2023) return that of the previous semester. Excluding winter.
+def get_previous_season(season_year_str):
+    season_order = {"Spring": "Fall", "Summer": "Spring", "Fall": "Summer"}
+    
+    try:
+        season, year = season_year_str.split()
+        year = int(year)
+        
+        if season not in season_order or not (2000 <= year <= 2030):
+            raise ValueError("Invalid season or year")
+
+        previous_season = season_order[season]
+
+        # Decrement the year if transitioning from Spring to Fall
+        if season == "Spring":
+            year -= 1
+
+        return f"{previous_season} {year}"
+    
+    except Exception as e:
+        return f"Error: {e}"
+
+standard = ['Fall 2026', 'Summer 2026', 'Spring 2026', 'Winter 2026',
+            'Fall 2025', 'Summer 2025', 'Spring 2025', 'Winter 2025',
+            'Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
            'Fall 2023', 'Summer 2023', 'Spring 2023', 'Winter 2023',
            'Fall 2022', 'Summer 2022', 'Spring 2022', 'Winter 2022',
            'Fall 2021', 'Summer 2021', 'Spring 2021', 
@ -14,9 +108,11 @@ standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
            'Fall 2018', 'Summer 2018', 'Spring 2018',
            'Fall 2017', 'Summer 2017', 'Spring 2017', ]

-code =  'fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
+code =  'fa26,su26,sp26,wi26,fa25,su25,sp25,wi25,fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')

-begin = ['08/26','06/10','01/29','01/02',
+begin = ['08/25','05/22','01/26','01/01',  # not sure on fa26
+         '08/25','05/27','01/27','01/02',
+         '08/26','06/10','01/29','01/02',
         '08/28','06/12','01/30','01/03',
         '08/22','06/13','01/31','01/04',
         '08/23','06/14','02/01',
@ -27,32 +123,66 @@ begin = ['08/26','06/10','01/29','01/02',

 canvas_label = []

-semester_list = {}
-
-season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
-s_to_n = {'sp':'30','su':'50','fa':'70'}
+sems_by_human_name = {}

 for s in list(zip(standard,code,begin)):
    season,year = s[0].split(' ')
    cl = year + " " + season
    sem_record = {'name': s[0], 'code': s[1], 'start': s[2] + '/' + s[1][-2:], 'number': year + season_to_number[s[0].split(' ')[0]]}
-    semester_list[s[0]] = sem_record
-    semester_list[s[1]] = sem_record
+    sems_by_human_name[s[0]] = sem_record
+    sems_by_human_name[s[1]] = sem_record
    canvas_label.append(cl)
-    semester_list[cl] = sem_record
+    sems_by_human_name[cl] = sem_record
+
+sems_by_short_name = funcy.project(sems_by_human_name, code)


-# Given 'fa22' return 202270
-def to_sis_sem(s):
-    season = s[0:2]
-    return "20" + s[2:5] + s_to_n[season]
+def dump():
+    print("BY HUMAN READABLE NAME")
+    print(json.dumps(sems_by_human_name,indent=2))
+
+    print("\n\nBY SEM SHORTCODE")  
+    print(json.dumps(sems_by_short_name,indent=2))



-# print(json.dumps(semester_list,indent=2))

-sems = funcy.project(semester_list, code)
-#print(json.dumps(sems,indent=2))
+
+def weeks_from_date():
+    from datetime import datetime, timedelta
+
+    weeks = int( input("how many weeks ahead? "))
+
+    # Replace this with your starting date
+    x = datetime.strptime("2025-05-27", "%Y-%m-%d")
+
+    # Add six weeks
+    six_weeks_later = x + timedelta(weeks=weeks)
+
+    print("Six weeks later:", six_weeks_later.strftime("%Y-%m-%d"))
+
+
+if __name__ == "__main__":
+    print ('')
+    options = { 1: ['print semester info',dump] ,  
+                2: ['compute x weeks from date', weeks_from_date ],
+    }
+        
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+        
+    # Call the function in the options dict
+    options[ int(resp)][1]() 

 """

--- a/ssb.py
+++ b/ssb.py
@ -0,0 +1,590 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+import pandas as pd
+from datetime import datetime
+import time, codecs, traceback
+from bs4 import BeautifulSoup as bs
+from io import StringIO
+from time import strptime
+from deepdiff import DeepDiff
+from datetime import datetime as dt
+from dateutil import parser                          
+from util import fix_t_name, split_class_dept, split_class_code, split_class_code_letter
+import json, re, sys, os, codecs, csv, pathlib
+import schedules
+
+def writepage(txt):
+    errfile = codecs.open('lastpage.txt','w','utf-8')
+    errfile.write(txt)
+    errfile.close()
+
+
+
+DEBUG = 0
+
+def d(s,end=''): 
+    global DEBUG
+    if end and DEBUG: print(s,end=end)
+    elif DEBUG: print(s)
+
+
+
+# Schedule / course filling history
+# csv headers:  crn, code, teacher,  datetime, cap, act, wlcap, wlact
+# Log the history of enrollments per course during registration
+def log_section_filling(current_sched_list, short_sem):
+    rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
+    rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
+    print(rows_j)
+    now = datetime.now().strftime('%Y-%m-%dT%H-%M')
+    csv_fn = 'cache/reg_history_' + short_sem + '.csv'
+    with codecs.open(csv_fn,'a','utf-8') as f:
+        writer = csv.writer(f)
+        for S in current_sched_list:
+            #print(S)
+            items = [now,]
+            [ items.append( S[X] ) for X in rows_j ]
+            writer.writerow(items)
+            
+# Same as above, but compressed, act only
+def log_section_filling2(current_sched_list, short_sem):
+
+
+
+    now = datetime.now().strftime('%Y-%m-%dT%H')
+    
+    todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
+    #print(todays_data)
+    
+    todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+    todays_df = todays_df.rename_axis('crn')
+    #print(todays_df)
+    todays_df.to_csv('cache/reg_today_new.csv', index=True)
+    
+    try:
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        print(myframe)
+    except:
+        fff = open('cache/reg_data_'+short_sem+'.csv','w')
+        fff.write('crn\n')
+        fff.close()
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+        #myframe = myframe.rename_axis('crn')
+        print("Creating new data file for this semester.")
+    
+    new_df = myframe.join( todays_df, on='crn', how='outer' )
+    new_df = new_df.rename_axis('crn')
+    print(new_df)
+    
+    reg_data_filename = 'reg_data_' + short_sem + '.csv'
+    new_df.to_csv('cache/' + reg_data_filename, index=False)
+    
+
+# Take banner's html and make a csv(?) file  
+def ssb_to_csv(src):
+    #out = codecs.open(schedfile,'w','utf-8')
+    output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
+    b = bs(src, 'html.parser')
+    tab = b.find(class_="datadisplaytable")
+    if not tab:
+        print("hmm... didn't find a 'datadisplaytable' in this html: ")
+        #print(src)
+        return 0
+    rows = tab.find_all('tr')
+    drows = list(filter(row_has_data,rows))
+    for dd in drows:
+        t = row_text(dd)
+        output += t
+    return output
+    
+
+
+# take text lines and condense them to one dict per section    
+def to_section_list(input_text,verbose=0):
+    this_course = ''
+    #todo: no output files
+    #jout = codecs.open(filename, 'w', 'utf-8')
+    #input = csv.DictReader(open(schedfile,'r'))
+    #input = UnicodeDictReader(input_text.splitlines())
+    all_courses = []
+    
+    
+    try:
+        f = StringIO(input_text)
+    except:
+        print("ERROR with this input_text:")
+        print(input_text)
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        #pdb.set_trace()
+        # clean funny unicode char in blank entries
+        r = {k: clean_funny2(v) for k,v in list(d.items()) }
+        if verbose: print("Cleaned: " + str(r))
+        
+        if 'time' in r:
+            if r['time']=='TBA': r['time'] = ''
+            if r['time']: r['partofday'] = time_to_partofday(r['time'])
+            
+        r['type'] = ''
+        
+        if 'loc' in r:
+            if r['loc'] == 'ONLINE': r['type'] = 'online'
+            if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
+            if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
+            if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
+        
+        if 'code' in r:
+            if re.search(r'ONLINE\sLIVE',r['code']):
+                r['type'] = 'online live'
+            elif re.search(r'ONLINE',r['code']):
+                r['type'] = 'online'
+                
+        # does it have a section? it is the last course
+        if r['crn']:   # is a new course or a continuation?
+            if verbose: print("  it's a new section.")
+            if this_course:
+                if not this_course['extra']: this_course.pop('extra',None)
+                all_courses.append(this_course)
+            this_course = r
+            #print(r['name'])
+            this_course['extra'] = []
+        else:
+            # is a continuation line
+            if verbose: print("  additional meeting: " + str(r))
+            for k,v in list(r.items()):
+                if not v: r.pop(k,None)
+            # TODO: if extra line is different type?
+            #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
+            #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
+            this_course['extra'].append(r)
+    return all_courses
+
+
+##
+## SCHEDULE PARSE HELPERS
+##
+##
+def time_to_partofday(t):
+    #todo: account for multiple sites/rows
+    # 11:20 am-12:10 pm
+    mor     = strptime('12:00 PM', '%I:%M %p')
+    mid     = strptime( '2:00 PM', '%I:%M %p')
+    aft     = strptime( '6:00 PM', '%I:%M %p')
+    if t == 'TBA':
+        return 'TBA'
+    t = t.upper()    
+    parts = t.split('-')
+    try:
+        begin = strptime(parts[0], '%I:%M %p')
+        end = strptime(parts[1], '%I:%M %p')
+        if end > aft:
+            return "Evening"
+        if end > mid:
+            return "Afternoon"
+        if end > mor:
+            return "Midday"
+        return "Morning"
+        #return begin,end
+    except Exception as e:
+        #print 'problem parsing: ', t, "   ",
+        return ""
+
+# Deduce a 'site' field, based on room name and known offsite locations    
+def room_to_site(room,verbose=0):
+    #todo: account for multiple sites/rows
+    #todo: better way to store these offsite labels
+    othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')    
+    # is it gilroy, mh, hol, other, online or hybrid?
+    site = 'Gilroy'
+    #if len(course[0]) > 13:
+    #    room = course[0][13]
+    if room in othersites:
+        site = "Other"
+    if room == 'TBA':
+        site = 'TBA'
+    if room == 'AV':
+        site = 'San Martin Airport'
+    if re.search('MHG',room):
+        site = 'Morgan Hill'
+    if re.search('HOL',room):
+        site = 'Hollister'
+    if re.search('COY',room):
+        site = 'Coyote Valley'
+    if re.search('OFFSTE',room):
+        site = 'Other'
+    if re.search('ONLINE',room):
+        site = 'Online'
+    if verbose: print(room, '\t', end=' ') 
+    return site
+  
+
+
+def row_has_data(r):      # helper    
+    if r.find_all('th'):
+        return False
+    if len(r.find_all('td')) > 2:
+        return True
+    if re.search('Note\:', r.get_text()):
+        return True
+    return False
+
+def row_text(r):   # helper  
+    #global dbg
+    
+    d("Row Txt Fxn gets:  ")
+    arr = []
+    for t in r.find_all('td'):
+        if t.contents and len(t.contents) and t.contents[0].name == 'img':
+            arr.append("1")
+            d("img")
+        r_text = t.get_text()
+        arr.append(r_text)
+        if 'colspan' in t.attrs and t['colspan']=='2':
+            d('[colspan2]')
+            arr.append('')
+        d("\t"+r_text, end=" ")
+    d('')
+    
+    if len(arr)==1 and re.search('Note\:',arr[0]):
+        note_line = clean_funny( arr[0] )
+        note_line = re.sub(r'\n',' ', note_line)
+        note_line = re.sub(r'"','', note_line)
+        #note_line = re.sub(r',','\,', note_line)
+        return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
+    del arr[0]
+    arr[1] = clean_funny(arr[1])
+    arr[2] = clean_funny(arr[2])
+    if arr[1]: arr[1] = arr[1] + " " + arr[2]
+    del arr[2]
+    arr = [ re.sub(r'&nbsp;','',a) for a in arr]
+    arr = [ re.sub(',','. ',a) for a in arr]
+    arr = [ re.sub('\(P\)','',a) for a in arr]
+    arr = [ a.strip() for a in arr]
+    #del arr[-1]
+    r = ','.join(arr)+'\n'
+    r = re.sub('\n','',r)
+    r = re.sub('add to worksheet','',r)
+    d("Row Txt Fxn returns:  " + r + "\n\n")
+
+    return r + '\n'
+
+
+
+
+        
+def clean_funny(str):
+    if str and str.encode('utf8') == ' ': return ''
+    return str
+def clean_funny2(str):
+    if str and str == '\xa0': return ''
+    if str and str == ' ': return ''
+    return str
+    
+def clean_funny3(str):
+    return re.sub('\xa0','',str)
+
+
+def scrape_schedule(short_sem, semester_label):
+    # Set up Chrome options
+    chrome_options = Options()
+    #chrome_options.add_argument("--headless")  # Run headless
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+
+    # Start WebDriver
+    driver = webdriver.Chrome(options=chrome_options)
+
+
+
+    URL = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
+
+    GOO = "G00102586"
+    GOO_PIN = "987654bb"
+
+    filename = f"{short_sem}_sched.json"
+    filename_html = f"{short_sem}_sched.html"
+
+
+    try:
+        # Open page
+        driver.get(URL)
+        writepage(driver.page_source)
+        print(driver.title)
+    
+    
+        driver.find_element(By.ID,"UserID").clear()
+        driver.find_element(By.ID,"UserID").send_keys(GOO)
+        driver.find_element(By.NAME,"PIN").send_keys(GOO_PIN)
+        driver.find_element(By.NAME,"loginform").submit()
+        print('login')
+        driver.implicitly_wait(5)
+    
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Student").click()
+        print('students')
+        driver.implicitly_wait(5)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Registration").click()
+        print('registration')
+        driver.implicitly_wait(5)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Search for Classes").click()
+        print('search for classes')
+        driver.implicitly_wait(15)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        dd = Select(driver.find_element(By.NAME,"p_term"))
+        if (dd):
+            dd.select_by_visible_text(semester_label)    
+            driver.find_element(By.XPATH,"/html/body/div/div[4]/form").submit()
+            print('semester')
+            driver.implicitly_wait(15)
+
+            writepage(driver.page_source)
+            print(driver.title)
+
+        driver.find_element(By.XPATH,"/html/body/div/div[4]/form/input[18]").click()   
+        print('advanced?')
+        driver.implicitly_wait(10)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.NAME,"SUB_BTN").click()
+        print('login')
+        driver.implicitly_wait(40)
+        time.sleep(15)
+        driver.implicitly_wait(40)
+
+        writepage(driver.page_source)
+        print(driver.title)
+        text = driver.page_source
+    
+        codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
+ 
+        ##
+        ## Start parsing html
+        ##
+    
+        as_list = ssb_to_csv(text)
+        print(as_list)
+        as_dict = to_section_list(as_list)
+        jj = json.dumps(as_dict,indent=2)
+        
+        ##
+        ## Diff from previous semester
+        ##
+        try:
+
+            ps = codecs.open('cache/'+filename,'r','utf-8')
+            prev_sched = json.loads(ps.read())
+            ps.close()
+        
+            if 1:   # sometimes I want to re-run this without affecting the logs.
+                log_section_filling(as_dict, short_sem)
+                log_section_filling2(as_dict, short_sem)
+        
+            dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
+            pretty_json = json.dumps(  json.loads( dd.to_json() ), indent=2 )
+            codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write(  pretty_json )    # dd.to_json() )
+    
+            # Next, rename the prev sched_xxYY.json data file to have its date,
+            # make this new one, and then upload it to the website. 
+            # Maybe even count the entries and do a little sanity checking
+            #
+            # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
+            # print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
+                                       
+            last_mod = time.ctime(os.path.getmtime('cache/' + filename))
+        
+            prev_stat = pathlib.Path('cache/' + filename).stat()
+            mtime = dt.fromtimestamp(prev_stat.st_mtime)
+            print(mtime)
+        except Exception as e:
+            print("Couldn't Diff.")
+            print("Got an exception: ", e)
+        # fname = pathlib.Path('test.py')
+        # assert fname.exists(), f'No such file: {fname}'  # check that the file exists
+        # print(fname.stat())
+        #
+        # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413, 
+        #                st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
+
+
+
+        codecs.open(f'cache/{filename}', 'w', 'utf-8').write(jj)
+    
+    
+        return as_dict
+ 
+    except Exception as e:
+        print("Got an exception: ", e)
+        #print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()))
+
+    finally:
+        driver.quit()
+
+
+def expanded(as_dict, short_sem):
+    #as_dict = scrape_schedule()
+
+    course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = schedules.campus_dept_hierarchy()
+    
+    expanded = list_latestarts(short_sem)
+    fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
+    
+    ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
+    with ffcsv as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(fields)
+        
+        for S in expanded:
+            parts = S['code'].split(' ')
+            S['dept'] = parts[0]
+            S['num'] = parts[1]
+            S['gp'] = course_to_gp[parts[0]]
+            S['dean'] = course_to_dean[parts[0]]
+            S['sem'] = short_sem
+            # S['act'] = S['cap']
+            if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
+            csvwriter.writerow( [ S[x] for x in fields ] )
+    
+    #put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0) 
+
+
+
+# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
+def list_latestarts(term):
+    
+    show_summary = 1
+    
+    the_year = '20' + term[2:4]
+    print("year: ", the_year, "  semester: ", term)
+    
+    #term_in = "cache/%s_sched.json" % term               
+    term_out = "cache/%s_latestarts.txt" % term
+    expanded_out = "%s_sched_expanded.json" % term
+    print("Writing output to " + term_out)
+    #infile = codecs.open(term_in, "r", "utf-8")
+    outfile = codecs.open(term_out, "w", "utf-8")
+    exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
+    expanded = []
+    #sched = json.loads(infile.read())
+
+
+    #sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
+    sched = json.loads( codecs.open(f"cache/{term}_sched.json","r","utf-8").read() )
+
+    by_date = {}
+    
+    if show_summary: print("course \t loc \t type \t time")
+    
+    for C in sched:
+        if (not C['type']) and C['loc'] != 'ONLINE':  # and C['time']:
+            C['type'] = 'in-person'
+            
+        if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
+        
+        if 'extra' in C:
+            if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
+                C['type'] = 'hybrid'
+        
+        times = C['time'].split("-")
+        if len(times) > 1:
+            time_start = times[0]
+            time_end = times[1]
+
+            try:
+                startt = time.strptime(time_start,"%I:%M %p")
+                endt = time.strptime(time_end,"%I:%M %p")
+                min_start = startt.tm_min
+                min_end = endt.tm_min
+                if min_start == 0: min_start = "00"
+                else: min_start = str(min_start)
+                if min_end == 0: min_end = "00"
+                else: min_end = str(min_end)
+                C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
+                C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
+                if 0:
+                    print("+  Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
+            except Exception as e:
+                print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
+        else:
+            C['time_start'] = ''
+            C['time_end'] = ''
+            
+        if re.search('TBA',C['date']): 
+            C['start'] = ''
+            C['end'] = ''
+            C['doy'] = ''
+            expanded.append(C)
+            continue
+
+        parts = C['date'].split("-") 
+        start = parts[0] + "/" + the_year
+        end = parts[1] + "/" + the_year
+
+        try:
+            startd = parser.parse(start)
+            endd = parser.parse(end)
+            C['start'] = "%i-%i" % (startd.month,startd.day)
+            C['end'] = "%i-%i" % (endd.month,endd.day)
+            C['doy'] = startd.timetuple().tm_yday
+            expanded.append(C)
+        except Exception as e:
+            print(e, "\n-- problem parsing ", start, " or ", end)
+        if not startd in by_date:
+            by_date[startd] = []
+        by_date[startd].append(C)
+        
+    exoutfile.write( json.dumps(expanded,indent=2) )
+    exoutfile.close()
+    #put_file('/home/public/schedule/', 'cache/', expanded_out, 0)  
+    
+    for X in sorted(by_date.keys()):
+        #print("Start: ", X)
+        if len(by_date[X]) < 200:
+            prettydate = X.strftime("%A, %B %d")
+            #print(prettydate + ": " + str(len(by_date[X])) + " courses")
+            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
+            for Y in by_date[X]:
+                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
+                #print(Y)
+                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
+                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
+    outfile.close()
+    #put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)  
+    return expanded
+
+
+
+# Get semesters to scrape
+with open('cache/to_scrape.json', 'r') as f:
+    semesters = json.load(f)
+
+# Loop through each item and call the function
+for item in semesters:
+    as_dict = scrape_schedule(item['short_sem'], item['sem'])
+    ex = expanded(as_dict, item['short_sem'])
+    print(f"Done with {item['sem']}. Sleeping 45 seconds.")
+    time.sleep(45)
+
+
--- a/stats.py
+++ b/stats.py
@ -675,7 +675,7 @@ def all_course_names():
    mr.write(json.dumps(master_record,indent=2))
             

-from semesters import semester_list, canvas_label
+from semesters import sems_by_human_name, canvas_label
 from semesters import code as semester_order
 from localcache import all_students_history
 from datetime import datetime, timedelta
@ -683,12 +683,12 @@ from datetime import datetime, timedelta
 def semester_dates():
    #print()
    for c in canvas_label:
-        print(semester_list[c])
+        print(sems_by_human_name[c])

        length = 15
-        if semester_list[c]['code'][0:2] == 'su':
+        if sems_by_human_name[c]['code'][0:2] == 'su':
            length = 5
-        start_date = semester_list[c]['start']
+        start_date = sems_by_human_name[c]['start']
        # Convert the date string to a datetime object
        date_object = datetime.strptime(start_date, '%m/%d/%y')
        start_fmt = date_object.strftime('%a %b %d, %Y')
@ -728,17 +728,17 @@ def course_line_process(line):
            current_student_block.append(current_student_info)
            normalized_blocks.append(current_student_block)
        current_student_block = []
-        current_student_info = {'first':semester_list[sem]['code'], 'last':''}
+        current_student_info = {'first':sems_by_human_name[sem]['code'], 'last':''}
        current_student = uid
        #print(f"Student: {uid} ({line['user_name']})")

    # line is a dict
-    current_student_info['last'] = semester_list[sem]['code']
+    current_student_info['last'] = sems_by_human_name[sem]['code']
    year, season = m1.group(1), m1.group(2)
    date_format = "%Y-%m-%d %H:%M:%S.%f"
    create_dt = datetime.strptime(line['created'], date_format)
    update_dt = datetime.strptime(line['updated'], date_format)
-    sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y')
+    sem_start = datetime.strptime(sems_by_human_name[sem]['start'], '%m/%d/%y')
    
    course = line['course_name']
    c_parts = course.split(' ')
@ -763,7 +763,7 @@ def course_line_process(line):
            sign = '+'
        #print(f"    {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}")
        temp_usr_name = re.sub(r',','',line['user_name'])
-        current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}")
+        current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{sems_by_human_name[sem]['code']}")
        if flow == "deleted":
            # deleted, give delete date
            del_day = sem_start - update_dt
@ -773,7 +773,7 @@ def course_line_process(line):
                del_day = -del_day
                sign = '+'
            #print(f"    {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}")
-            current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}")
+            current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{sems_by_human_name[sem]['code']}")


 def normalize_course_histories():
--- a/users.py
+++ b/users.py
@ -1,6 +1,6 @@

 import json, codecs, requests, re, pdb, csv, textdistance, collections
-import sys, csv, string, funcy, math, shutil, imghdr, os
+import sys, csv, string, funcy, math, shutil, os
 import pytz, time
 import pandas as pd
 import matplotlib.pyplot as plt
@ -8,12 +8,14 @@ import matplotlib.pyplot as plt
 #from pandas import TimeGrouper
 from PIL import Image
 from collections import defaultdict
-from pipelines import fetch, fetch_stream, getSemesterSchedule, header, url, FetchError, put_file
+from pipelines import fetch, fetch_stream, header, url, FetchError, put_file
+from schedules import get_semester_schedule
 from courses import course_enrollment, users_in_semester
 from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock
 from localcache import teachers_courses_semester, course_mode, sem_schedule
 from localcache2 import all_2x_sem_courses_teachers, all_sem_courses_teachers
-from pipelines import dean, dean_names
+from schedules import campus_dept_hierarchy
+#from pipelines import dean, dean_names  #TODO

 from util import dept_from_name, most_common_item
 from os.path import exists, getmtime
@ -234,7 +236,7 @@ def staff_dir(get_fresh=False):
 #

 def schedForTeacherOverview(long,short):
-    sem = getSemesterSchedule(short)
+    sem = get_semester_schedule(short)
    sem['type'] = sem['type'].apply(classType)
    #sem['code'] = sem[['code','type']].apply(' '.join,axis=1)
    sem['sem'] = short
@ -1099,6 +1101,7 @@ def checkForAvatar(id=2):

 # Grab em. Change the first if when continuing after problems....
 def downloadPhoto():
+    import imghdr
    pix_dir = 'cache/picsCanvas2022/'
    # Update the list of all ilearn users?
    i_last_ix = '-1'
@ -2203,17 +2206,28 @@ def cross_ref_training():
    wb = load_workbook("C:/Users/phowell/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx")
    print(wb.sheetnames)

+    # Fetch from Canvas DB. Make sure its recently updated.
+    # Also relies on schedule being in database. Run localcache2.courses_to_sched()
+    courses = all_2x_sem_courses_teachers('202550', '202570')        # 
+    #courses = all_sem_courses_teachers('202470')   
+
+
    # report for email
    report = codecs.open('cache/gott_report.txt','w','utf-8')

    # update local list of teachers from ilearn?
    RELOAD_TEACHERS = 0
+    ask = input('download new list of teachers? (y/n) ')
+    if ask.strip()=='y': RELOAD_TEACHERS = 1
+
    if RELOAD_TEACHERS:
        teacherRolesUpdateCache()
    
    # TODO inefficient but just read it again
    all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())

+    course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = campus_dept_hierarchy()
+
    records = {}
    sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed', 'SU21 Workshop', 'BOOT CAMP', 'GOTT ABC', 'TITLE V GOTT ACADEMY', 'Other Certifications']
    for sname in sheets:
@ -2221,7 +2235,7 @@ def cross_ref_training():
        sheet = wb[sname]
        records[sname] = {}
        for row in sheet.iter_rows():
-            if row[0].value == 'G Number': continue
+            if row[1].value == 'ID': continue
            goo = row[1].value
            rowvals = [str(v.value) for v in row]
            records[sname][goo] = rowvals
@ -2233,17 +2247,39 @@ def cross_ref_training():
    teachers_bydept = defaultdict(set)
    alldepts = set()

+    # reconfigure to key on goo
+    by_goo = defaultdict(dict)

-    # courses = all_2x_sem_courses_teachers('202450', '202470')        # 
-    courses = all_sem_courses_teachers('202470')   
+    for course,coursedict in records.items():
+        print(course)
+        for goo,record in coursedict.items():
+            if goo=='ID': continue
+            if record[0]=='Name': continue
+            print(f"    {goo}")
+            try:
+                if record[4]=="None":
+                    by_goo[goo][course] = "ok"
+                else:
+                    by_goo[goo][course] = record[4]   # record the end date
+            except:
+                print(f"  -- problem with this record {json.dumps(record)}")
+                by_goo[goo][course] = 'ok'
+
+    bg_file = codecs.open('cache/gott_by_goo.json','w','utf-8')
+    bg_file.write(json.dumps(by_goo,indent=2))
+
+
+    ##
+    ## Start looking at the courses to cross reference
+    ##
    for c in courses:
        print(c)
        try:
-            goo = c[6]
-            crn = c[8]
+            goo = c[8]
+            crn = c[4]
            name = c[1]        # full course name
-            teacher = c[4]     # last, first
-            ctype = c[7]
+            teacher = c[6]     # last, first
+            ctype = c[3]
            dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1)
            alldepts.add(dept1)
            d = list(c)
@ -2267,16 +2303,16 @@ def cross_ref_training():
    flagfont = PatternFill("solid", fgColor="00FFFFCC")

    for thedean in ['et','nl','ss','jn', 'de']:
-        sheet.cell(row=r, column=1).value = dean_names[thedean]
+        sheet.cell(row=r, column=1).value = dean_code_to_name[thedean]
        sheet.cell(row=r, column=1).font = deptfont
        r += 2

-        report.write(f"Dean: {dean_names[thedean]}\n")
+        report.write(f"Dean: {dean_code_to_name[thedean]}\n")

        for D in alldepts:
-            if not D in dean:
+            if not D in course_to_dean:
                print(f"MISSING DEAN for dept: {D}")
-            if dean[D] == thedean:
+            if course_to_dean[D] == thedean:
                if len(teachers_bydept[D]) == 0: continue
                print(f"\n------------\n{D}")
                sheet.cell(row=r, column=1).value = D
@ -2289,8 +2325,8 @@ def cross_ref_training():
                    waived = 0
                    sects = teachers[t]
                    print(f"Sections for {t}: {sects}")
-                    goo = sects[0][6]
-                    course_mode = sects[0][7]
+                    goo = sects[0][8]
+                    course_mode = sects[0][3]
                    print(t)
                    sheet.cell(row=r, column=1).value = f"{t}"
                    sheet.cell(row=r, column=2).value = f"{goo}"
@ -2521,6 +2557,21 @@ def cross_ref_training_withcsv():
                print(f"   {s[8]}   {s[2]}")
            print()

+def get_portfolios(id=0):
+    if not id:
+        id = int( input( "what user id? "))
+
+    p = fetch( f"{url}/api/v1/users/{id}/eportfolios" )
+
+    print(json.dumps(p, indent=2))
+
+
+def get_port_pages(id=0):
+    if not id:
+        id = int( input("what portfolio id? "))
+
+    p = fetch(f"{url}/api/v1/eportfolios/{id}/pages")
+    print(json.dumps(p, indent=2))



@ -2552,6 +2603,10 @@ if __name__ == "__main__":
                
                25: ['cross ref training', cross_ref_training],
                26: ['find goo numbers in training spreadsheet', training_find_goos],
+
+                30: ['get portfolios for user id', get_portfolios],
+                31: ['get portfolio pages for portfolio id', get_port_pages],
+
                #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
                #5: ['Match names in schedule & ilearn', match_usernames],
                #6: ['Create Dept\'s ZTC list', create_ztc_list],
--- a/util.py
+++ b/util.py
@ -13,6 +13,43 @@ import functools

 from functools import reduce

+# Teacher name format changed. Remove commas and switch first to last   
+def fix_t_name(str):
+    str = str.strip()
+    str = re.sub('\s+',' ',str)
+    parts = str.split(', ')
+    if len(parts)>1:
+        return parts[1].strip() + " " + parts[0].strip()
+    return str
+
+# Separate dept and code
+def split_class_dept(c):
+    return c.split(' ')[0]
+def split_class_code(c):
+    num = c.split(' ')[1]
+    parts = re.match('(\d+)([a-zA-Z]+)',num)
+    #ret = "Got %s, " % c
+    if parts:
+        r = int(parts.group(1))
+        #print(ret + "returning %i." % r)
+        return r
+    #print(ret + "returning %s." % num)
+    return int(num)
+def split_class_code_letter(c):
+    num = c.split(' ')[1]
+    parts = re.match('(\d+)([A-Za-z]+)',num)
+    if parts:
+        return parts.group(2)
+    return ''
+
+def nowAsStr():    
+    #Get the current time, printed in the right format
+    currentTime = datetime.datetime.utcnow()
+    prettyTime = currentTime.strftime('%a, %d %b %Y %H:%M:%S GMT')
+    return prettyTime
+
+
+
 def contains_key_value(lst, x, y):
    """
    Checks if a list contains a dictionary with a specific key-value pair.
@ -236,3 +273,15 @@ def partition(times_list):
    dd.write(json.dumps(timeline_times))
    
    return sessions
+
+
+def clean_fn(s):
+    s = re.sub(r'[\s:]+','',s)
+    s = re.sub(r'\/','+',s)
+    return s
+    
+def format_html(html):
+    soup = bs(html, 'html.parser')
+    return soup.prettify()
+    
+
--- a/video.py
+++ b/video.py
@ -0,0 +1,182 @@
+# Tools for detecting video embeds, swapping SRT subtitle files, etc
+
+
+import codecs, re, requests, json, os, webbrowser
+from bs4 import BeautifulSoup as bs
+from util import minimal_string, stripper, mycleaner
+from content import grab_course_pages
+from pipelines import put_file
+
+
+# Use template to build html page with homegrown subtitles    
+def build_srt_embed_php(data):
+    template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
+    result = ''
+    for L in template:
+        L = re.sub('FRAMEID',data['frameid'],L)
+        L = re.sub('TITLE',data['title'],L)
+        L = re.sub('EMBEDLINK',data['embedlink'],L)
+        L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
+        result += L
+    return result
+ 
+
+
+
+def yt_title(code):
+    global saved_titles
+    if code in saved_titles:
+        return saved_titles[code]
+    a = requests.get('https://www.youtube.com/watch?v=%s' % code)
+    bbb = bs(a.content,"lxml")
+    ccc = bbb.find('title').text
+    ccc = re.sub(r'\s\-\sYouTube','',ccc)
+    saved_titles[code] = ccc
+    codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
+    return ccc
+ 
+def swap_youtube_subtitles():
+    # example here:  http://siloor.github.io/youtube.external.subtitle/examples/srt/
+    
+    # srt folder, look at all filenames
+    srtlist = os.listdir('video_srt')
+    i = 0
+    for V in srtlist:
+        print(str(i) + '.  ' + V)
+        i += 1
+    choice = input("Which SRT folder? ")
+    choice = srtlist[int(choice)]
+    srt_folder = 'video_srt/'+choice
+    class_srt_folder = choice
+    srt_files = os.listdir(srt_folder)
+    srt_shorts = {}
+    print("\nThese are the subtitle files: " + str(srt_files))
+    for V in srt_files:
+        if V.endswith('srt'):
+            V1 = re.sub(r'(\.\w+$)','',V)
+            srt_shorts[V] = minimal_string(V1)
+    
+    crs_id = input("What is the id of the course?  ")
+    grab_course_pages(crs_id)
+    v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+    v1_content = v1_pages.read()
+    
+    # a temporary page of all youtube links
+    tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
+    
+    # course pages, get them all and look for youtube embeds
+    title_shorts = {}
+    title_embedlink = {}
+    title_list = []
+    print("I'm looking for iframes and youtube links.")
+    for L in v1_content.split('\n'):
+        if re.search('<a.*?href="https:\/\/youtu',L):
+            print("Possibly there's a linked video instead of embedded:" + L)
+        if re.search('iframe',L):
+            ma = re.compile('(\w+)=(".*?")')
+            #print "\n"
+            this_title = ''
+            for g in ma.findall(L):
+                print(g)
+                if g[0]=='title':
+                    this_title = g[1].replace('"','')
+                if g[0]=='src':
+                    this_src = g[1].replace('"','')
+                #print g
+            if not this_title:
+                tmp = re.search(r'embed\/(.*?)\?',this_src)
+                if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
+                if tmp:
+                    this_title = yt_title(tmp.groups()[0])
+            title_shorts[this_title] = minimal_string(this_title)
+            title_list.append(this_title)
+            title_embedlink[this_title] = this_src    
+            print("%s\n" % this_title.encode('ascii','ignore'))
+            tp.write(  "%s<br><a target='_blank' href='%s'>%s</a><br /><br />" % (this_title, this_src, this_src) )
+    # match them
+    # lowercase, non alpha or num chars become a single space, try to match
+    # if any srts remain unmatched, ask. 
+    tp.close()
+    webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
+    
+    matches = {}                    # key is Title, value is srt file
+    for S,v in list(srt_shorts.items()):
+        found_match = 0
+        print(v, end=' ')
+        for T, Tv in list(title_shorts.items()):
+            if v == Tv:
+                print(' \tMatches: ' + T, end=' ')
+                found_match = 1
+                matches[T] = S
+                break
+        #print "\n"
+    
+    print("\nThese are the srt files: ")
+    print(json.dumps(srt_shorts,indent=2))
+    print("\nThese are the titles: ")
+    print(json.dumps(title_shorts,indent=2))
+    print("\nThese are the matches: ")
+    print(json.dumps(matches,indent=2))
+    
+    print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
+    
+    for S,v in list(srt_shorts.items()):
+        if not S in list(matches.values()):
+            print("\nDidn't find a match for: " + S)
+            i = 0
+            for T in title_list:
+                if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
+                i += 1
+            print("Here's the first few lines of the SRT:")
+            print((  re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
+            choice = input("Which one should I match it to? (zero for no match)  ")
+            if int(choice)>0:
+                matches[ title_list[ int(choice)-1 ] ] = S
+                print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
+    print("ok, here are the matches:")
+    print(json.dumps(matches,indent=2))
+    
+    # construct subsidiary pages, upload them
+    i = 0
+    for m,v in list(matches.items()):
+        # open template
+        # do replacement
+        i += 1
+        data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v  }
+        print(json.dumps(data,indent=2))
+        file_part = v.split('.')[0]
+        new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
+        new_php.write(build_srt_embed_php(data))
+        new_php.close()
+    #srt_files = os.listdir(srt_folder)
+    put_file(class_srt_folder)
+    
+    
+def test_swap():
+    crs_id = '6923'
+    # swap in embed code and re-upload canvas pages
+    v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+    v2_content = v2_pages.read()
+    ma = re.compile('(\w+)=(".*?")')
+    
+    for L in v2_content.split('\n'):
+        find = re.findall('<iframe(.*?)>',L)
+        if find:
+            print("Found: ", find)
+            for each in find:
+                #print "\n"
+                this_title = ''
+                this_src = ''
+                for g in ma.findall(each):
+                    #print g
+                    if g[0]=='title':
+                        this_title = g[1].replace('"','')
+                    if g[0]=='src':
+                        this_src = g[1].replace('"','')
+                    #print g
+                if not this_title:
+                    tmp = re.search(r'embed\/(.*?)\?',this_src)
+                    if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
+                    if tmp:
+                        this_title = yt_title(tmp.groups()[0])
+                print("Found embed link: %s\n and title: %s\n" % (this_src,this_title.encode('ascii','ignore')))