diff --git a/calendarget.py b/calendarget.py
new file mode 100644
index 0000000..3fdd605
--- /dev/null
+++ b/calendarget.py
@@ -0,0 +1,158 @@
+from ast import FormattedValue
+from googleapiclient.discovery import build
+import datetime
+from datetime import timedelta
+from zoneinfo import ZoneInfo
+import win32com.client
+
+from canvas_secrets import GOOGLE_API_KEY
+
+# Replace these with your own API key and Calendar ID.
+calendars = {'peter_main':'peter.howell@gmail.com',
+               'aly_and_peter':'5qgh1nv9g5on3am4vres9i451c@group.calendar.google.com',
+               'tlc':'4aq36obt0q5jjr5p82p244qs7c@group.calendar.google.com',
+               'birthdays':'4q73r3ern2k9k83t0orq6iqaac@group.calendar.google.com'}    
+
+
+
+def to_my_timezone(d, md_table_format=0):
+    # Parse the datetime string into a timezone-aware datetime.
+    dt = datetime.datetime.fromisoformat(d)
+
+    # Convert to Pacific Time.
+    dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
+
+    # Format the datetime. Note:
+    # - %A: full weekday name (e.g., Thursday)
+    # - %B: full month name (e.g., April)
+    # - %d: day of the month (with leading zero, so we'll remove it later)
+    # - %I: hour in 12-hour format (with leading zero)
+    # - %M: minute (with leading zero)
+    # - %p: AM/PM indicator (will be in uppercase)
+    formatted = dt_pacific.strftime("%A, %B %d | %I:%M%p")
+
+    # Remove a leading zero from the day and hour if present
+    formatted = formatted.replace(" 0", " ")
+
+    # Convert the AM/PM indicator to lowercase
+    formatted = formatted.replace("AM", "am").replace("PM", "pm")
+    return formatted
+    #return dt_pacific.strftime("%Y-%m-%d %H:%M:%S %Z%z")
+
+def in_my_timezone(d, md_table_format=0):
+    # Parse the datetime string into a timezone-aware datetime.
+    dt = datetime.datetime.fromisoformat(d)
+
+    # Convert to Pacific Time.
+    #dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
+
+    # Format the datetime. Note:
+    # - %A: full weekday name (e.g., Thursday)
+    # - %B: full month name (e.g., April)
+    # - %d: day of the month (with leading zero, so we'll remove it later)
+    # - %I: hour in 12-hour format (with leading zero)
+    # - %M: minute (with leading zero)
+    # - %p: AM/PM indicator (will be in uppercase)
+    formatted = dt.strftime("%A, %B %d | %I:%M%p")
+
+    # Remove a leading zero from the day and hour if present
+    formatted = formatted.replace(" 0", " ")
+
+    # Convert the AM/PM indicator to lowercase
+    formatted = formatted.replace("AM", "am").replace("PM", "pm")
+    return formatted
+
+def gcal():
+    # Build the service using the API key.
+    service = build('calendar', 'v3', developerKey=GOOGLE_API_KEY)
+    n = 30
+
+
+    for name,id in calendars.items():
+        # Get the current time in RFC3339 format (UTC).
+        now = datetime.datetime.utcnow().isoformat() + 'Z'
+        print(f'Getting the upcoming {n} events')
+
+        events_result = service.events().list(
+            calendarId=id,
+            timeMin=now,
+            maxResults=n,
+            singleEvents=True,
+            orderBy='startTime'
+        ).execute()
+        events = events_result.get('items', [])
+
+        if not events:
+            print('No upcoming events found.')
+            return
+
+        print(f"| Date | Time | Event | Lead |")
+        print(f"|------|------|-------|------|")
+        for event in events:
+            # Depending on the event, the start time might be a date or dateTime.
+            start = event['start'].get('dateTime', event['start'].get('date'))
+            print(f"| {to_my_timezone(start,1)} | {event.get('summary', 'No Title')} | | |")
+
+
+def ocal():
+    
+
+    # Initialize Outlook COM object.
+    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+    #outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+
+    #print(outlook)
+
+    #print(dir(outlook))
+
+    #print(dir(outlook.Folders))
+
+    root_folder = outlook.Folders.Item(1)
+    print (f"Root folder: {root_folder.Name}")
+    
+    #And to know the names of the subfolders you have:
+    #print("\nFolders:")
+    #for folder in root_folder.Folders:
+    #    print ("  " + folder.Name)
+        
+
+
+    # Get the default calendar folder.
+    calendar_folder = outlook.GetDefaultFolder(9)  # 9 refers to the Calendar folder
+    #print(calendar_folder)
+    #print(dir(calendar_folder))
+    #print(calendar_folder.Items)
+    items = calendar_folder.Items
+    print("Total items in Calendar:", items.Count)
+
+    # Define the time window for which to fetch events.
+    n = 14
+    now = datetime.datetime.now()
+    end = now + timedelta(days=n)  # next 7 days
+
+    # Restrict the calendar items to the time window.
+    # The Outlook filter syntax uses dates in "mm/dd/yyyy hh:mm" format.
+    filter_start = now.strftime("%m/%d/%Y %H:%M")
+    filter_end = end.strftime("%m/%d/%Y %H:%M")
+    restriction = f"[Start] >= '{filter_start}' AND [End] <= '{filter_end}'"
+
+    calendar_items = calendar_folder.Items
+    calendar_items.IncludeRecurrences = True
+    calendar_items.Sort("[Start]")
+
+    #print(calendar_items)
+
+    print(f"Calendar items in next {n} days:")
+    restricted_items = calendar_items.Restrict(restriction)
+
+    for item in restricted_items:
+    #for item in calendar_items:
+        start_dt = item.Start  # a COM datetime object
+        start = in_my_timezone(str(start_dt),1)
+        subject = item.Subject
+        print(f"{start} - {subject}")
+
+
+
+if __name__ == '__main__':
+    ocal()
diff --git a/content.py b/content.py
index 08c5a4d..8debc49 100644
--- a/content.py
+++ b/content.py
@@ -1,23 +1,24 @@
 
 
 #saved_titles = json.loads( codecs.open('cache/saved_youtube_titles.json','r','utf-8').read() )
-from calendar import FRIDAY
+
+#from calendar import FRIDAY
+#import html2markdown as h2m
+
+from typing import ItemsView
 import requests, codecs, os, re, json, sys, pypandoc
-import webbrowser, bs4, trafilatura, pickle, tomd, checker 
-import html2markdown as h2m
-from pipelines import header, fetch, url, put_file
-from util import clean_title, to_file_friendly, minimal_string, stripper, mycleaner
+from checker import safe_html
+from pipelines import header, fetch, url
+from util import clean_title, to_file_friendly
 from bs4 import BeautifulSoup as bs
 from html.parser import HTMLParser
-from collections import defaultdict
-from pdfminer.high_level import extract_text
-from sentence_transformers import SentenceTransformer, util
 
-h = HTMLParser()
 
-pagebreak = '\n\n<!-- BREAK -->\n\n'
+pagebreak = '\n\n<!-- BREAK -->\n\n<div style="page-break-before: always;"></div>\n\n'
 DBG = 1
 
+items = []
+
 def d(s):
     global DBG
     if DBG: print(s)
@@ -88,7 +89,7 @@ def test_forums(id=0):
     except:
         print("Course folder exists.")
 
-    index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
+    index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )
     print(json.dumps(index,indent=2))
 
 def write_message(fd, view, participants):
@@ -98,10 +99,13 @@ def write_message(fd, view, participants):
             write_message(fd, r, participants)
     fd.write("</blockquote>\n")
 
-def extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose=0):
+def extract_forums(id, course_folder, item_id_to_index, verbose=0):
     ###
     ### FORUMS
     ###
+
+    global items
+
     index = []
     forum_f = course_folder + '/forums'
     headered = 0
@@ -136,7 +140,7 @@ def extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose=0
                 
                 # write to running log of content in order of module
                 if p['id'] in item_id_to_index:
-                    items_inorder[  item_id_to_index[ p['id'] ]  ] = f"<h1>{title}</h1>\n\n{message}\n\n{pagebreak}"
+                    items[  item_id_to_index[ p['id'] ]  ] = f"<h1>{title}</h1>\n\n{message}\n\n{pagebreak}"
                 else:
                     print('  This forum didnt seem to be in the modules list.')
             except Exception as e:
@@ -151,14 +155,30 @@ def extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose=0
         
         
 
-        
+#
+#
+#
+#
+#
+# todo: include front page. 
+# todo: clean html
+# todo: toc
+#
+#
 # Download everything interesting in a course to a local folder
 # Build a master file with the entire class content
-def accessible_check(id=""):
+def course_download(id=""):
+    global items
+
     if not id:
         id = input("ID of course to check?  ")
-    verbose = 1
-    PAGES_ONLY = 1
+        # temp hard code
+        #id = "21284"
+
+    verbose = 0
+    PAGES_ONLY = 0
+
+    videos_log = codecs.open('cache/accessible_check_log.txt','w','utf-8')
     
     save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']
 
@@ -167,14 +187,12 @@ def accessible_check(id=""):
     # reverse lookup into items array
     item_id_to_index = {}
 
-    # is it used?
-    items_inorder = ["<font size='24'>" + courseinfo['name'] + "</font>\n\n" + pagebreak,]
-    running_index = 1
     
     modules = fetch('/api/v1/courses/' + str(id) + '/modules',verbose)
     
     # headers / module names
-    items = []
+    items = [f"<h1>{courseinfo['name']}</h1>\n{pagebreak}",]
+    running_index = 1
     for x in range(9000): items.append(0)
     
     video_link_list = []
@@ -192,7 +210,7 @@ def accessible_check(id=""):
                 
                 if I['type'] == 'SubHeader': 
                     #print('subheader: ' + str(I))
-                    items[running_index] = '<h3>%s</h3>\n' % str(json.dumps(I,indent=2))
+                    items[running_index] = f"<h3>{I['title']}</h3>\n"
                     
                 if I['type'] == 'Page': 
                     item_id_to_index[ I['page_url'] ] = running_index
@@ -303,23 +321,22 @@ def accessible_check(id=""):
         else:    
             t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
             if t2 and 'body' in t2 and t2['body']:
-                bb = bs(t2['body'],features="lxml")
-                a_links = bb.find_all('a')
+                soup_infolder = bs(t2['body'],features="lxml")
+                soup_in_main = bs(t2['body'],features="lxml")
+                a_links = soup_infolder.find_all('a')
                 for A in a_links: 
                     href = A.get('href')
 
                     if href and re.search( r'youtu',href):
                         video_link_list.append( (A.get('href'), A.text, 'pages/'+easier_filename + ".html") )
                         
-
-                page_images = bb.find_all('img') 
+                # Images
+                page_images = soup_infolder.find_all('img') 
+                page_image_paths = {}
                 for I in page_images:
                     src = I.get('src')
                     if src:
                         d('   - %s' % src)
-                        #if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
-                        #    d('   * skipping file behind passwords')
-                        #else:
                         try:
                             r = requests.get(src,headers=header, stream=True)
                             mytype = r.headers['content-type']
@@ -327,16 +344,74 @@ def accessible_check(id=""):
                             r_parts = mytype.split("/")
                             ending = r_parts[-1]
 
-                            with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+                            if ending=='jpeg': ending = "jpg"
+
+                            img_full_path = f"{pages_f}/{str(image_count)}.{ending}"
+                            local_src = f"{str(image_count)}.{ending}"
+                            page_image_paths[src] = f"pages/{local_src}"
+                            I['src'] = local_src
+
+                            with open(img_full_path, 'wb') as fd:
                                 for chunk in r.iter_content(chunk_size=128):
                                     fd.write(chunk)
                             image_count += 1
                         except Exception as e:
                             d( ' * Error downloading page image, %s' % str(e) )
-                    
+                
+                # Repeat for version for main file
+                page_main_images = soup_in_main.find_all('img')
+                for I in page_main_images:
+                    src = I.get('src')
+                    if src:
+                        I['src'] = page_image_paths[src]
+
+
+                # STUDIO VIDEOS
+                # Regex pattern to match "custom_arc_media_id%3D" and capture everything
+                # until the next '&' or end of string
+                pattern = r"custom_arc_media_id%3D([^&]+)"
+                found_ids = []
+
+                replacement_tag = '''<video width="480" height="320" controls="controls"><source src="http://serverIP_or_domain/location_of_video.mp4" type="video/mp4"></video>'''
+
+                # Iterate over all <iframe> tags
+                for iframe in soup_infolder.find_all("iframe"):
+                    src = iframe.get("src")
+                    if src:
+                        # Search for the pattern in the src
+                        match = re.search(pattern, src)
+                        if match:
+                            found_ids.append(match.group(1))
+                        videos_log.write(f"page: {p['url']}  iframe src: {src}\n")
+                        videos_log.flush()
+
+                        match2 = re.search('instructuremedia\.com', src)
+                        if match2:
+                            iframe_response = requests.get(src)
+                            if iframe_response.status_code != 200:
+                                print(f"Failed to retrieve iframe content from: {src}")
+                                continue
+                            videos_log.write(f"succesfully fetched {src}\n")
+                            videos_log.flush()
+
+                            # Step 4: Parse the iframes HTML
+                            iframe_soup = bs(iframe_response.text, 'html.parser')
+
+                            video_tag = iframe_soup.find('video')
+                            if video_tag:
+                                # Find the <source> tag(s) within the video
+                                source_tags = video_tag.find_all('source')
+                                # Extract each 'src' attribute
+                                for source_tag in source_tags:
+                                    print("Video Source found:", source_tag.get('src'))
+                                    videos_log.write(f"page: {p['url']} video src: {source_tag.get('src')}\n")
+                                    videos_log.flush()
+                
+                            
+                # WRITE out page
                 try:
+                    this_page_content = f"<h2>{t2['title']}</h2>\n{soup_infolder.prettify()}"
                     with codecs.open(this_page_filename, 'w','utf-8') as fd:
-                        this_page_content = "<h2>%s</h2>\n%s" % ( t2['title'], t2['body'] )
                         fd.write(this_page_content)
                 except:
                     d(' * problem writing page content')
@@ -345,7 +420,7 @@ def accessible_check(id=""):
                 d('   * nothing returned or bad fetch')
         # write to running log of content in order of module
         if p and p['url'] in item_id_to_index:
-            items[  item_id_to_index[ p['url'] ]  ] = this_page_content +'\n\n'+pagebreak
+            items[  item_id_to_index[ p['url'] ]  ] =  f"<h2>{t2['title']}</h2>\n{soup_in_main.prettify()}\n{pagebreak}"
         else:
             d(' -- This page didnt seem to be in the modules list.')
     
@@ -394,7 +469,7 @@ def accessible_check(id=""):
         ### FORUMS
         ###
 
-        index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
+        index.extend( extract_forums(id, course_folder, item_id_to_index, verbose) )
 
         """        
     
@@ -441,11 +516,13 @@ def accessible_check(id=""):
     # Full course content in single file    
     print("Writing main course files...")
     mycourse = codecs.open(course_folder+'/fullcourse.raw.html','w','utf-8')
+
+    mycourse.write(f"<html><head><base href='file:///C:/Users/phowell/source/repos/course_temps/course_{id}/'></head><body>\n")
     
     for I in items:
         if I:
             mycourse.write(  I  )
-            
+    mycourse.write("\n</body></html>")
     
     
     temp = open('cache/coursedump.txt','w')
@@ -453,8 +530,8 @@ def accessible_check(id=""):
     temp.write("\n\n\n")
     temp.write( "index:  " + json.dumps(index,indent=2) )
     temp.write("\n\n\n")
-    temp.write( "items_inorder:  " + json.dumps(items_inorder,indent=2) )
-    temp.write("\n\n\n")
+    #temp.write( "items_inorder:  " + json.dumps(items_inorder,indent=2) )
+    #temp.write("\n\n\n")
     temp.write( "item_id_to_index:  " + json.dumps(item_id_to_index,indent=2) )
     
     
@@ -471,14 +548,29 @@ def accessible_check(id=""):
         mycourse.write("</table>\n")
             
     mycourse.close()
-    output = pypandoc.convert_file(course_folder+'/fullcourse.raw.html', 'html', outputfile=course_folder+"/fullcourse.html")
-    output1 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'md', outputfile=course_folder+"/fullcourse.md")
-    output2 = pypandoc.convert_file(course_folder+'/fullcourse.html', 'docx', outputfile=course_folder+"/fullcourse.docx")
+    try:
+        pypandoc.convert_file(course_folder+'/fullcourse.raw.html', 'html', outputfile=course_folder+"/fullcourse.html")
+    except Exception as e:
+        print(f"couldn't create html fullcourse page: {e}")    
+    try:
+        pypandoc.convert_file(course_folder+'/fullcourse.html', 'md', outputfile=course_folder+"/fullcourse.md")
+    except Exception as e:
+        print(f"couldn't create markdown fullcourse page: {e}")    
+    try:
+        pypandoc.convert_file(course_folder+'/fullcourse.html', 'docx', outputfile=course_folder+"/fullcourse.docx")
+    except Exception as e:
+        print(f"couldn't create doc fullcourse page: {e}")    
 
 
+def media_testing():
+    user_id = 285  #ksmith
+    t = f"https://gavilan.instructuremedia.com/api/public/v1/users/{user_id}/media"
+    media = fetch(t,verbose=1,media=1)
+    print(media)
+
 def pan_testing():
     course_folder = '../course_temps/course_6862'
-    output3 = pypandoc.convert_file(course_folder+'/fullcourse.md', 'html', outputfile=course_folder+"/fullcourse.v2.html")
+    pypandoc.convert_file(course_folder+'/fullcourse.md', 'html', outputfile=course_folder+"/fullcourse.v2.html")
 
 # Given course, page url, and new content, upload the new revision of a page
 def create_page(course_num,new_title,new_content):
@@ -557,7 +649,7 @@ def grab_course_pages(course_num=-1):
                 t2 = url + '/api/v1/courses/' + str(course_num) + '/pages/'+I['page_url']
                 print('Getting: ' + t2)
                 mypage = fetch(t2)
-                fixed = checker.safe_html(mypage['body'])
+                fixed = safe_html(mypage['body'])
                 if fixed:
                     #markdown = h2m.convert(fixed)
                     #p_data = pandoc.read(mypage['body'])
@@ -617,7 +709,7 @@ def update_page():
     results_dict = {}
     while(t2): t2 = fetch(t2)
     mypage = results_dict
-    fixed_page = checker.safe_html(mypage['body'])
+    fixed_page = safe_html(mypage['body'])
     upload_page(course_num,chosen_url,fixed_page)
 
 # given dict of file info (from files api), construct an img tag that works in a page
@@ -630,7 +722,7 @@ def update_page():
 def html_file_to_page(filename, course, tags):
     
     try:
-        soup = bs4.BeautifulSoup(codecs.open(filename,'r', 'utf-8').read(), 'html.parser')
+        soup = bs(codecs.open(filename,'r', 'utf-8').read(), 'html.parser')
     except Exception as e:
         print(f"Exception on {filename}: {e}")
         return
@@ -730,737 +822,13 @@ def upload_page(course_num,pageurl,new_content):
         print(r3)
         print('ok')
 
-# Use template to build html page with homegrown subtitles    
-def build_srt_embed_php(data):
-    template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
-    result = ''
-    for L in template:
-        L = re.sub('FRAMEID',data['frameid'],L)
-        L = re.sub('TITLE',data['title'],L)
-        L = re.sub('EMBEDLINK',data['embedlink'],L)
-        L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
-        result += L
-    return result
- 
-
-
-
-def yt_title(code):
-    global saved_titles
-    if code in saved_titles:
-        return saved_titles[code]
-    a = requests.get('https://www.youtube.com/watch?v=%s' % code)
-    bbb = bs(a.content,"lxml")
-    ccc = bbb.find('title').text
-    ccc = re.sub(r'\s\-\sYouTube','',ccc)
-    saved_titles[code] = ccc
-    codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
-    return ccc
- 
-def swap_youtube_subtitles():
-    # example here:  http://siloor.github.io/youtube.external.subtitle/examples/srt/
-    
-    # srt folder, look at all filenames
-    srtlist = os.listdir('video_srt')
-    i = 0
-    for V in srtlist:
-        print(str(i) + '.  ' + V)
-        i += 1
-    choice = input("Which SRT folder? ")
-    choice = srtlist[int(choice)]
-    srt_folder = 'video_srt/'+choice
-    class_srt_folder = choice
-    srt_files = os.listdir(srt_folder)
-    srt_shorts = {}
-    print("\nThese are the subtitle files: " + str(srt_files))
-    for V in srt_files:
-        if V.endswith('srt'):
-            V1 = re.sub(r'(\.\w+$)','',V)
-            srt_shorts[V] = minimal_string(V1)
-    
-    crs_id = input("What is the id of the course?  ")
-    grab_course_pages(crs_id)
-    v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
-    v1_content = v1_pages.read()
-    
-    # a temporary page of all youtube links
-    tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
-    
-    # course pages, get them all and look for youtube embeds
-    title_shorts = {}
-    title_embedlink = {}
-    title_list = []
-    print("I'm looking for iframes and youtube links.")
-    for L in v1_content.split('\n'):
-        if re.search('<a.*?href="https:\/\/youtu',L):
-            print("Possibly there's a linked video instead of embedded:" + L)
-        if re.search('iframe',L):
-            ma = re.compile('(\w+)=(".*?")')
-            #print "\n"
-            this_title = ''
-            for g in ma.findall(L):
-                print(g)
-                if g[0]=='title':
-                    this_title = g[1].replace('"','')
-                if g[0]=='src':
-                    this_src = g[1].replace('"','')
-                #print g
-            if not this_title:
-                tmp = re.search(r'embed\/(.*?)\?',this_src)
-                if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
-                if tmp:
-                    this_title = yt_title(tmp.groups()[0])
-            title_shorts[this_title] = minimal_string(this_title)
-            title_list.append(this_title)
-            title_embedlink[this_title] = this_src    
-            print("%s\n" % this_title.encode('ascii','ignore'))
-            tp.write(  "%s<br><a target='_blank' href='%s'>%s</a><br /><br />" % (this_title, this_src, this_src) )
-    # match them
-    # lowercase, non alpha or num chars become a single space, try to match
-    # if any srts remain unmatched, ask. 
-    tp.close()
-    webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
-    
-    matches = {}                    # key is Title, value is srt file
-    for S,v in list(srt_shorts.items()):
-        found_match = 0
-        print(v, end=' ')
-        for T, Tv in list(title_shorts.items()):
-            if v == Tv:
-                print(' \tMatches: ' + T, end=' ')
-                found_match = 1
-                matches[T] = S
-                break
-        #print "\n"
-    
-    print("\nThese are the srt files: ")
-    print(json.dumps(srt_shorts,indent=2))
-    print("\nThese are the titles: ")
-    print(json.dumps(title_shorts,indent=2))
-    print("\nThese are the matches: ")
-    print(json.dumps(matches,indent=2))
-    
-    print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
-    
-    for S,v in list(srt_shorts.items()):
-        if not S in list(matches.values()):
-            print("\nDidn't find a match for: " + S)
-            i = 0
-            for T in title_list:
-                if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
-                i += 1
-            print("Here's the first few lines of the SRT:")
-            print((  re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
-            choice = input("Which one should I match it to? (zero for no match)  ")
-            if int(choice)>0:
-                matches[ title_list[ int(choice)-1 ] ] = S
-                print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
-    print("ok, here are the matches:")
-    print(json.dumps(matches,indent=2))
-    
-    # construct subsidiary pages, upload them
-    i = 0
-    for m,v in list(matches.items()):
-        # open template
-        # do replacement
-        i += 1
-        data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v  }
-        print(json.dumps(data,indent=2))
-        file_part = v.split('.')[0]
-        new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
-        new_php.write(build_srt_embed_php(data))
-        new_php.close()
-    #srt_files = os.listdir(srt_folder)
-    put_file(class_srt_folder)
-    
-    
-def test_swap():
-    crs_id = '6923'
-    # swap in embed code and re-upload canvas pages
-    v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
-    v2_content = v2_pages.read()
-    ma = re.compile('(\w+)=(".*?")')
-    
-    for L in v2_content.split('\n'):
-        find = re.findall('<iframe(.*?)>',L)
-        if find:
-            print("Found: ", find)
-            for each in find:
-                #print "\n"
-                this_title = ''
-                this_src = ''
-                for g in ma.findall(each):
-                    #print g
-                    if g[0]=='title':
-                        this_title = g[1].replace('"','')
-                    if g[0]=='src':
-                        this_src = g[1].replace('"','')
-                    #print g
-                if not this_title:
-                    tmp = re.search(r'embed\/(.*?)\?',this_src)
-                    if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
-                    if tmp:
-                        this_title = yt_title(tmp.groups()[0])
-                print("Found embed link: %s\n and title: %s\n" % (this_src,this_title.encode('ascii','ignore')))
 
  
 def multiple_downloads():
     
     x = input("What IDs? Separate with one space: ")
     for id in x.split(" "):
-        accessible_check(id)
-
-
-###
-###
-### Text / Knowledge Base
-###
-### How about downloading all possible info / webpages / sources
-### related to Gavilan and creating a master search index?
-###
-### Goals:
-### - Scripted approach to allow re-indexing / updating
-### - Break everything down into paragraphs
-###
-### - Script to extract keywords, topics, entities, summaries, questions answered 
-###   from each paragraph or chunk.
-### - Use spacy, gensim, nltk, or gpt-3, or a combination of all of them
-###
-### - Create vector / embeddings for each paragraph
-###
-### - Enable a vector search engine and connect to front page of gavilan.cc
-### - Use that to feed handful of source paragraphs (& prompt) into gpt and
-###   receive text answers to questions.
-
-def demo_vector_search():
-    from gensim.models import Word2Vec
-    from gensim.utils import simple_preprocess
-    import nltk.data
-    import spacy
-
-    # (might have to upgrade pip first...)
-    # pip install --upgrade click
-    #
-    # python -m spacy download en_core_web_sm
-    # python -m spacy download en_core_web_lg
-
-    def is_complete_sentence(text):
-        #text = text.text
-        doc = nlp(text)
-        sentences = list(doc.sents)
-        if len(sentences) == 1 and text.strip() == sentences[0].text.strip():
-            return True
-        return False
-
-
-    sentences = [
-        "This is an example sentence.",
-        "Here is another sentence for training."
-    ]
-
-    paragraph = """Financial Aid services are available in person!  We are happy to assist you with your financial aid needs.  If you are interested in visiting the office in person, please review the guidelines for visiting campus and schedule your appointment:
-
-Guidelines for In-Person Financial Aid Services
-
-Due to FERPA regulations, no student information will be given to anyone other than the student without authorization from the student.
-We continue to offer virtual services.  Financial Aid staff may be reached by email, phone, text, and zoom!  Please refer to the contact information and schedules below.
-
-Gavilan-WelcomeCenter_Peer_Mentors.jpg
-
-Do you need assistance filing the FAFSA or California Dream Act Application? Friendly and knowledgeable Peer Mentors are available to assist you virtually and in person!  Details below for an online Zoom visit, phone call, or in-person visit with Peer Mentors. 
-
-Monday - Friday 8am - 5pm, Student Center
-Join Zoom to Connect with a Peer Mentor
-Or call (669) 900-6833 and use meeting ID 408 848 4800
-
-MicrosoftTeams-image.png
-
- 
-
-Do you need assistance with an existing financial aid application, financial aid document submission, or review of your financial aid package? Schedule an in-person, phone, or zoom appointment with our Financial Aid counter. 
-
-Mon - Thurs: 9am - 1:00pm, 2:00pm - 5:00pm
-Fri: 10am - 2pm
-Office: (408) 848-4727     Email: finaid@gavilan.edu
-Schedule an In-Person, Phone or Zoom Appointment"""
-
-    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
-    sentences1 = tokenizer.tokenize(paragraph)
-    for i,s in enumerate(sentences1):
-        print(i, "\t", s)
-    print("\n\n")
-
-    #nlp = spacy.load('en_core_web_sm')
-    nlp = spacy.load('en_core_web_md')
-
-    doc = nlp(paragraph)
-    sentences2 = list(doc.sents)
-    for i,s in enumerate(sentences2):
-        t = re.sub(r'\n+',' ',s.text)
-        is_sentence = 'yes' if is_complete_sentence(t) else 'no '
-        print(i, " ", is_sentence, "  ", t)
-    print("\n\n")
-
-    #for text in sentences2:
-    #    print(text, "is a complete sentence?" , is_complete_sentence(text))   
-
-    return
-
-    tokenized_sentences = [simple_preprocess(s) for s in sentences]
-    model = Word2Vec(tokenized_sentences, min_count=1, vector_size=100)
-
-    example_word = "example"
-    vector = model.wv[example_word]
-    print(f"Vector for the word '{example_word}': {vector}")
-
-
-
-def makedir():
-    files = os.listdir('cache/crawl')
-    #print(files)
-    files.sort()
-    for f in files:
-        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
-        if m:
-            name = m.groups()[0]
-            parts = name.split('+')
-            print(parts)
-
-def manual_index():
-    files = os.listdir('cache/crawl')
-    #print(files)
-    ii = codecs.open('cache/crawl/index.html','w','utf-8')
-    ii.write('<html><body><h1>Site index</h1>\n')
-    files.sort()
-    for f in files:
-        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
-        if m:
-            name = m.groups()[0]
-            parts = name.split('+')
-            ii.write('<br /><a href="mirror/'+f+'">'+f+'</a>\n')
-
-def my_site():
-    files = os.listdir('cache/crawl')
-    output = []
-    files.sort()
-    for f in files:
-        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
-        if m:
-            name = m.groups()[0]
-            parts = name.split('+')
-            output.append(parts)
-    return output
-
-
-## TODO  site scraper
-## TODO  find package that extracts text from web page
-### TODO master list of what to index.
-
-## TODO PDFs and DOCXs
-## TODO fix urls w/ anchors
-
-def crawl():
-    import scrapy, logging
-    from scrapy.crawler import CrawlerProcess
-
-    logger = logging.getLogger()
-    logger.setLevel(level=logging.CRITICAL)
-    logging.basicConfig(level=logging.CRITICAL)
-    logger.disabled = True
-
-
-    avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu',  'augusoft', 
-            'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', 
-            'admit\/search\.php', 'GavilanTrusteeAreaMaps2022\.pdf', 'schedule\/2019', 'schedule\/2020', 'schedule\/2021',
-            'schedule\/2022', 'schedule\/previous',  ]
-
-    class MySpider(scrapy.Spider):
-        name = 'myspider'
-        #start_urls = ['https://gavilan.curriqunet.com/catalog/iq/1826']
-        start_urls = ['https://www.gavilan.edu']
-
-
-        """
-        logging.getLogger("scrapy").setLevel(logging.CRITICAL)
-        logging.getLogger("scrapy.utils.log").setLevel(logging.CRITICAL)
-        logging.getLogger("scrapy.extensions.telnet").setLevel(logging.CRITICAL)
-        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
-        logging.getLogger("scrapy.core.engine").setLevel(logging.CRITICAL)
-        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
-
-        logger.disabled = True"""
-
-        def parse(self, response):
-            print('visited:', repr(response.url), 'status:', response.status)
-            done = 0
-
-            if re.search(r'\.pdf$', response.url):
-                m = re.search(r'\/([^\/]+\.pdf)$', response.url)
-                if m:
-                    print("saving to ", save_folder + '/' + clean_fn(response.url))
-                    pdf_response = requests.get(response.url)
-                    with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
-                        f.write(pdf_response.content)
-                    text = extract_text(save_folder + '/' + clean_fn(response.url))
-                    codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text)
-                    done = 1
-
-            for ext in ['doc','docx','ppt','pptx','rtf','xls','xlsx']:
-                if re.search(r'\.'+ext+'$', response.url):
-                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
-                    if m:
-                        print("saving to ", save_folder + '/' + clean_fn(response.url))
-                        pdf_response = requests.get(response.url)
-                        with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
-                            f.write(pdf_response.content)
-                        #text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt')
-                        pandoc_infile = save_folder + '/' + clean_fn(response.url) 
-                        pandoc_outfile = save_folder + '/' + clean_fn(response.url) + '.html'
-                        print("pandoc in file: %s" % pandoc_infile)
-                        print("pandoc outfile: %s" % pandoc_outfile)
-                        pypandoc.convert_file(pandoc_infile, 'html', outputfile=pandoc_outfile, extra_args=['--from=%s' % ext, '--extract-media=%s' % save_folder + '/img' ]) 
-                        pandoc_output = codecs.open(pandoc_outfile,'r','utf-8').read()
-                        txt_output = trafilatura.extract(pandoc_output,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
-                        if txt_output:
-                            codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output)
-                        done = 1
-
-            for ext in ['jpg','jpeg','gif','webp','png','svg','bmp','tiff','tif','ico']:
-                if re.search(r'\.'+ext+'$', response.url):
-                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
-                    if m:
-                        print("saving to ", save_folder + '/img/' + clean_fn(response.url))
-                        pdf_response = requests.get(response.url)
-                        with open(save_folder + '/img/' + clean_fn(response.url), 'wb') as f:
-                            f.write(pdf_response.content)
-                        done = 1
-
-            if not done:
-                f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8')
-
-                this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
-                if this_output:
-                    f_out.write(this_output)
-                f_out.close()
-                links = response.css('a::attr(href)').getall()
-
-                # Follow each link and parse its contents
-                for link in links:
-                    go = 1
-                    full_link = response.urljoin(link)
-                    print('++++++ trying ', full_link)
-
-                    if not re.search(r'gavilan\.edu',full_link):
-                        go = 0
-                        print('--- not gav edu')
-                    else:
-                        if re.search(r'hhh\.gavilan\.edu',full_link):
-                            pass
-                        elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link):
-                            # need to add www to gavilan.edu
-                            m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link)
-                            if m:
-                                full_link = m.group(1) + 'www.' + m.group(2)
-                    for a in avoid:
-                        if re.search(a,full_link):
-                            go = 0
-                            print('--- avoid ', a)
-
-                    if go: yield scrapy.Request(full_link, callback=self.parse,
-                                        headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"})
-                    else:
-                        print("------ avoiding ", full_link)
-    # Instantiate a CrawlerProcess object
-    process = CrawlerProcess()
-
-    # Add the MySpider spider to the process
-    process.crawl(MySpider)
-
-    # Start the process
-    logging.basicConfig(level=logging.CRITICAL)
-    logging.getLogger('scrapy').propagate = False
-    logging.getLogger("trafilatura").setLevel(logging.CRITICAL)
-    logging.getLogger("trafilatura").propagate = False
-    logging.getLogger("pdfminer").setLevel(logging.CRITICAL)
-    logging.getLogger("pdfminer").propagate = False
-    logging.getLogger("urllib3").setLevel(logging.CRITICAL)
-    logging.getLogger("urllib3").propagate = False
-    logging.basicConfig(level=logging.CRITICAL)
-    process.start()
-
-
-
-save_folder = 'cache/crawl'
-clean_folder = 'cache/cleancrawl'
-
-def clean_fn(s):
-    s = re.sub(r'[\s:]+','',s)
-    s = re.sub(r'\/','+',s)
-    return s
-    
-def format_html(html):
-    soup = bs4.BeautifulSoup(html, 'html.parser')
-    return soup.prettify()
-    
-
-
-
-def txt_clean_index():
-    files = os.listdir(save_folder)
-    line_freq = defaultdict(int)
-    
-    # first pass
-    for f in files:
-        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
-        for L in lines:
-            L = L.strip()
-            line_freq[L] += 1
-    
-    # second pass
-    for f in files:
-        print("\n\n",f)
-        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
-        out = codecs.open(clean_folder + '/' + f,'w','utf-8')
-        for L in lines:
-            L = L.strip()
-            if L in line_freq and line_freq[L] > 3:
-                continue
-            print(L)
-            out.write(L + '\n')
-        out.close()
-
-
-
-
-from whoosh import fields, columns
-from whoosh.index import create_in, open_dir
-from whoosh.fields import Schema, TEXT, ID, STORED, NUMERIC
-from whoosh.qparser import QueryParser
-from whoosh.analysis import StemmingAnalyzer
-
-def priority_from_url(url):
-    priority = 1
-    # url is like this: https++www.gavilan.edu+news+Newsletters.php.txt
-    m = re.search(r'gavilan\.edu\+(.*)\.\w\w\w\w?$',url)
-    if m:
-        address = m.group(1)
-        parts = address.split('+')
-        if parts[0] in ['accreditation','curriculum','senate','research','old','committee','board','styleguide']:
-            priority += 20
-        if parts[0] in ['news','IT','HOM','administration']:
-            priority += 10
-        if parts[0] == 'admit' and parts[1] == 'schedule':
-            priority += 10
-        if 'accreditation' in parts:
-            priority += 50
-        if re.search(r'hhh\.gavilan\.edu',url):
-            priority += 100
-        priority *= len(parts)
-        #print(priority, parts)
-    else:
-        priority *= 50
-        #print(priority, url)
-    return priority
-
-
-def test_priority():
-    ff = os.listdir('cache/crawl')
-    for f in ff:
-        priority_from_url(f)
-
-
-
-def displayfile(f,aslist=0):
-    lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines()
-    lines = [L.strip() for L in lines]
-    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
-    if aslist:
-        return lines
-    return "\n".join(lines)
-
-def any_match(line, words):
-    # true if any of the words are in line
-    for w in words:
-        if re.search(w, line, re.IGNORECASE):
-            return True
-    return False
-
-
-def find_match_line(filename, query):
-    q_words = query.split(" ")
-    lines = codecs.open('cache/crawl/' + filename,'r','utf-8').readlines()
-    lines = [L.strip() for L in lines]
-    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
-    lines = [L for L in lines if any_match(L, q_words)]
-    return "\n".join(lines)
-
-
-
-def search_index():
-    s = ''
-    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
-    ix = open_dir("cache/searchindex")
-
-    
-    #with ix.reader() as reader:
-        #print(reader.doc_count())   # number of documents in the index
-        #print(reader.doc_frequency("content", "example"))   # number of documents that contain the term "example" in the "content" field
-        #print(reader.field_length("content"))   # total number of terms in the "content" field
-        #print(reader.term_info("content", "example"))   # information about the term "example" in the "content" field
-        #print(reader.dump())   # overview of the entire index
-
-
-    while s != 'q':
-        s = input("search or 'q' to quit: ")
-        if s == 'q':
-            return
-
-        # Define the query parser for the index
-        with ix.searcher() as searcher:
-            query_parser = QueryParser("content", schema=schema)
-
-            # Parse the user's query
-            query = query_parser.parse(s)
-            print(query)
-
-            # Search the index for documents matching the query
-            results = searcher.search(query, sortedby="priority")
-
-            # Print the results
-            i = 1
-            for result in results:
-                print(i, result)   # result["url"],  result["content"])
-                print(find_match_line(result['url'], s))
-                print()
-                i += 1
-
-
-
-def create_search_index():
-    # Define the schema for the index
-
-    stem_ana = StemmingAnalyzer()
-    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
-
-    # Create a new index in the directory "myindex"
-    ix = create_in("cache/searchindex", schema)
-
-    # Open an existing index
-    #ix = open_dir("cache/searchindex")
-
-    # Define the writer for the index
-    writer = ix.writer()
-
-    # Index some  documents
-    files = os.listdir('cache/crawl')
-    files.sort()
-    for f in files:
-        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
-        if m:
-            print(f)
-            writer.add_document(url=f, title=m.group(1), content=displayfile(f), priority=priority_from_url(f))
-    writer.commit()
-
-
-
-from annoy import AnnoyIndex
-import random
-
-def test_embed():
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    sample = "What is this world coming to? What happens in the data and the research?"
-    embed = model.encode(sample)
-
-    print("\nSample sentence:", sample)
-    print("\nEmbedding:", embed)
-    print("\nEmbedding size:", len(embed))
-
-
-def create_embeddings():
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    vecsize = 384   # sentence transformer embedding size
-    t = AnnoyIndex(vecsize, 'angular')
-    files = os.listdir('cache/crawl')
-    output = []    #  ['index', 'file','sentence']
-    index = 0
-    save_embeds = []  
-    files.sort()
-    for f in files:
-        print(f)
-        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
-        if m:
-            lines = displayfile(f,1)
-            embeddings = model.encode(lines)
-
-            print("\n-----", index, f)
-
-            for sentence, embedding in zip(lines, embeddings):
-                if len(sentence.split(' ')) > 5:
-                    print(index, "Sentence:", sentence)
-                    print(embedding[:8])
-                    t.add_item(index, embedding)
-                    output.append( [index,f,sentence] )
-                    index += 1
-        if index > 500:
-            break
-    t.build(30) # 30 trees
-    t.save('cache/sentences.ann')
-    pickle.dump( output, open( "cache/embedding_index.p", "wb" ) )
-
-
-
-
-def search_embeddings():
-    f = 384   # sentence transformer embedding size
-    n = 10    # how many results
-
-    u = AnnoyIndex(f, 'angular')
-    u.load('cache/sentences.ann') # super fast, will just mmap the file
-    print(u.get_n_items(), "items in index")
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    search_index = pickle.load( open( "cache/embedding_index.p", "rb" ) )
-    print(search_index)
-
-
-    s = ''
-    while s != 'q':
-        s = input("search or 'q' to quit: ")
-        if s == 'q':
-            return
-        query_embedding = model.encode(s)
-        results = u.get_nns_by_vector(query_embedding, n)
-
-        # Print the top 5 results
-        for i, r in enumerate(results):
-            print(f'Top {i+1}: {r}, {search_index[r]}')     #{file} - {sentence} - (Score: {score})')
-
-
-
-def repairy_ezproxy_links():
-    from localcache2 import pages_in_term
-
-    # get all pages in term
-    all_pages = pages_in_term()
-
-    # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
-    for p in all_pages:
-        course = p[1]
-        title = p[4]
-        url = p[5]
-        body = p[7]
-        # print(body)
-        try:
-            #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
-            a = re.search(r'Online Library Services',title)
-            if a:
-                continue
-            s = re.findall('\n.*ezproxy.*\n',body)
-            if s:
-                print(course, title, url)
-                print("   ", s, "\n")    # s.group())
-        except Exception as e:
-            #print(f"Skipped: {title},   {e}")
-            pass
+        course_download(id)
 
 
 
@@ -1489,7 +857,7 @@ def clear_old_page(shell_id,page_name):
             r2 = requests.delete(t2, headers=header)
             print(f"{r2}")
 
-def add_support_page_full_semester(term=287):
+def add_support_page_full_semester(term=289):
     print("Fetching list of all active courses")
     # term = 184 # fa24   # 182
     c = getCoursesInTerm(term,0,0)  # sp25 = 287   wi24=182
@@ -1601,32 +969,49 @@ def check_modules_for_old_orientation():
         print(id)
 
 
+def repair_ezproxy_links():
+    from localcache2 import pages_in_term
+
+    # get all pages in term
+    all_pages = pages_in_term()
+
+    # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
+    for p in all_pages:
+        course = p[1]
+        title = p[4]
+        url = p[5]
+        body = p[7]
+        # print(body)
+        try:
+            #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
+            a = re.search(r'Online Library Services',title)
+            if a:
+                continue
+            s = re.findall('\n.*ezproxy.*\n',body)
+            if s:
+                print(course, title, url)
+                print("   ", s, "\n")    # s.group())
+        except Exception as e:
+            #print(f"Skipped: {title},   {e}")
+            pass
+
+
 if __name__ == "__main__":
     
     print ('')
-    options = { 1: ['download a class into a folder / word file', accessible_check] ,
+    options = { 1: ['download a class into a folder / word file', course_download] ,
                 2: ['download multiple classes', multiple_downloads ],
                 3: ['convert stuff', pan_testing ],
                 4: ['convert md to html', md_to_course ],
                 5: ['course download tester', test_forums ],
-                # 5: ['import freshdesk content', freshdesk ],
                 6: ['download all a courses pages', grab_course_pages],
-                7: ['demo vector search', demo_vector_search],
-                8: ['crawl',crawl],
-                9: ['clean text index', txt_clean_index],
-               10: ['make web dir struct', manual_index],
-               11: ['create search embeddings', create_embeddings],
-               12: ['create search index', create_search_index],
-               13: ['do an index search', search_index],
-               14: ['do a vector search', search_embeddings],
-               15: ['test priority', test_priority], 
-               16: ['test embed', test_embed], 
-               17: ['repair ezproxy links', repairy_ezproxy_links],
+               17: ['repair ezproxy links', repair_ezproxy_links],
                18: ['create pages from html files', make_pages_from_folder],
                19: ['fetch support page', fetch_support_page],
                20: ['create support page', create_support_page],
                21: ['add support page to all shells in semester', add_support_page_full_semester],
-               22: ['fetch all modules / items', check_modules_for_old_orientation]
+               22: ['fetch all modules / items', check_modules_for_old_orientation],
+               30: ['media fetch', media_testing]
               }
     
     if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
diff --git a/courses.py b/courses.py
index 380d63b..63d4233 100644
--- a/courses.py
+++ b/courses.py
@@ -1,14 +1,15 @@
-from ast import Try
+#from ast import Try, TryStar
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from datetime import datetime   
 import pytz
-from dateutil import parser
+#from dateutil import parser
 from datetime import datetime
 #from symbol import try_stmt
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
-from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
-from pipelines import sems
+from pipelines import fetch, fetch_stream, fetch_collapse, header, url
+from schedules import get_semester_schedule
+#from pipelines import sems
 from localcache import course_quick_stats, get_courses_in_term_local, course_student_stats, all_sem_courses_teachers, full_reload
 from localcache2 import db, users_new_this_semester, users_new_this_2x_semester, course_from_id, user_ids_in_shell
 from collections import defaultdict
@@ -399,7 +400,8 @@ def course_term_summary_local(term="180",term_label="FA23"):
 from localcache2 import student_count, teacher_list, course_from_id, course_sched_entry_from_id
 
 # Relevant stuff trying to see if its even being used or not
-def course_term_summary(term="287",term_label="SP25"):
+# relies on schedule being in database
+def course_term_summary(term="289",term_label="FA25"):
     print("Summary of %s" % term_label)
     get_fresh = 1
     courses = getCoursesInTerm(term, get_fresh, 0)
@@ -490,7 +492,7 @@ GROUP BY c.code ORDER BY c.state, c.code""" % (S['id'],S['id'])
     outp2.write("\n\n---------\nNOT PUBLISHED\n\n" + json.dumps(notpub, indent=2))
 
 # Fetch all courses in a given term
-def getCoursesInTerm(term=0,get_fresh=1,show=0,active=0):        # a list
+def getCoursesInTerm(term=0,get_fresh=1,show=1,active=0):        # a list
     if not term:
         term = getTerms(1,1)
     ff = 'cache/courses_in_term_%s.json' % str(term)
@@ -590,15 +592,9 @@ def all_equal2(iterator):
     return len(set(iterator)) <= 1
 
 
-"""
-180     2023 Fall
-179     2023 Summer
-178     2023 Spring
-177     2023 Winter
-"""
 def semester_cross_lister():
-    sem = "sp25"
-    term = 287  #sp25
+    sem = "fa25"
+    term = 289
     xlist_filename = f"cache/{sem}_crosslist.csv"
     checkfile = codecs.open('cache/xlist_check.html','w','utf-8')
     checkfile.write('<html><body><table>\n')
@@ -700,6 +696,10 @@ def ez_xlist():
 # Crosslist given 2 ids, computing the new name and code
 def xlist(host_id, parasite_list):
     host_info = course_from_id(host_id)
+
+    if not host_info:
+        print(f"Couldn't find course id {host_id} in database. Do you need to update it?")
+        return ""
     host_info['crn'] = host_info['sis_source_id'][7:]
     host_info['dept'] = dept_from_name( host_info['course_code'] )
     host_info['num'] = num_from_name(host_info['course_code'] )
@@ -709,6 +709,9 @@ def xlist(host_id, parasite_list):
     
     para_info_list = [ course_from_id(x) for x in parasite_list ]
     for p in para_info_list:
+        if not p:
+            print(f"Couldn't find course id for parasite in database. Do you need to update it?")
+            return ""
         p['crn'] = p['sis_source_id'][7:]
         p['dept'] = dept_from_name(p['course_code'] )
         p['num'] = num_from_name(p['course_code'] )
@@ -826,58 +829,69 @@ def course_term_summary_3():
 
 # check number of students and publish state of all shells in a term
 def all_semester_course_sanity_check():
+    outputfile = 'cache/courses_checker.csv'
     t = 287
     term = "sp25"
-    c = getCoursesInTerm(t,0,0)    
+    c = getCoursesInTerm(t,1,0)    
     sched1 = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
     sched = { x['crn']: x for x in sched1 }
     #codecs.open('cache/courses_in_term_{t}.json','w','utf-8').write(json.dumps(c,indent=2))
     #output = codecs.open('cache/courses_w_sections.csv','w','utf-8')
     #output.write(  ",".join(['what','id','parent_course_id','sis_course_id','name']) + "\n" )
-    output2 = codecs.open('cache/courses_checker.csv','w','utf-8')
+    output2 = codecs.open(outputfile,'w','utf-8')
     output2.write(  ",".join(['id','sis_course_id','name','state','mode','startdate','students']) + "\n" )
     htmlout = codecs.open('cache/courses_checker.html','w','utf-8')
     htmlout.write('<html><body><table>\n')
     htmlout.write(f'<tr><td><b>Name</b></td><td><b>SIS ID</b></td><td><b>State</b></td><td><b>Mode</b></td><td><b>Start Date</b></td><td><b># Stu</b></td></tr>\n')
+    html_sections = []
     i = 0
     for course in c:
-        u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
-        course['info'] = fetch(u2)
+        try:
+            u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
+            course['info'] = fetch(u2)
         
-        # correlate to schedule
-        crn = course['sis_course_id'][7:]
-        ctype = '?'
-        cstart = '?'
-        ts = '?'
-        if crn in sched:
-            ctype = sched[crn]['type']
-            cstart = sched[crn]['start']
-            ts = sched[crn]['act']
+            # correlate to schedule
+            crn = course['sis_course_id'][7:]
+            ctype = '?'
+            cstart = '?'
+            ts = '?'
+            if crn in sched:
+                ctype = sched[crn]['type']
+                cstart = sched[crn]['start']
+                ts = sched[crn]['act']
 
-        info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts  ]
-        info = list(map(str,info))
-        info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
-        info2 = list(map(str,info2))
-        output2.write( ",".join(info2) + "\n" )
-        output2.flush()
-        print(info2)
-        #output.write( ",".join(info) + "\n" )
+            info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts  ]
+            info = list(map(str,info))
+            info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
+            info2 = list(map(str,info2))
+            output2.write( ",".join(info2) + "\n" )
+            output2.flush()
+            print(info2)
+            #output.write( ",".join(info) + "\n" )
 
-        uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
-        htmlout.write(f'<tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
-        htmlout.flush()
-        #uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
-        #course['sections'] = fetch(uu)
-        #s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
-        #for row in s_info:
-        #    print(row)
-        #    output.write( ",".join( map(str,row) ) + "\n" )
-        #output.flush()
-        i += 1
-        #if i % 5 == 0:
-        #    codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+            uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
+            if course["workflow_state"]=='unpublished' and ctype=='online' and cstart=="1-27":
+                html_sections.append(f'<!--{course["name"]}--><tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
+            #uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
+            #course['sections'] = fetch(uu)
+            #s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
+            #for row in s_info:
+            #    print(row)
+            #    output.write( ",".join( map(str,row) ) + "\n" )
+            #output.flush()
+            i += 1
+            #if i % 5 == 0:
+            #    codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+        except Exception as e:
+            print(f"error on {course}")
+            print(f"{e}")
     #codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
+
+    html_sections.sort()
+    for h in html_sections:
+        htmlout.write(h)
     htmlout.write('</table></body></html>\n')
+    print(f"wrote to {outputfile}")
 
 
 
@@ -999,7 +1013,9 @@ def unenroll_student(courseid,enrolid):
 
 
 def enroll_id_list_to_shell(id_list, shell_id, v=0):
-    id_list = set(id_list)
+    # id list has pairs, [id,name]
+
+    id_list = set([i[0] for i in id_list])
     existing = course_enrollment(shell_id)     # by user_id
     existing_ids = set( [ x['user_id'] for x in existing.values() ])
 
@@ -1181,12 +1197,28 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1):
 
 def enroll_gott_workshops():
     # stupid gav tls broken
-    #r = requests.get("https://www.gavilan.edu/staff/tlc/db.php?a=signups")
+    r = requests.get("https://www.gavilan.edu/staff/tlc/signups.php")
+
+    text = r.text
+
+    # Regex to extract the JSON object
+    match = re.search(r"var\s+signups\s*=\s*(\[\{.*?\}\]);", text, re.DOTALL)
+
+    if match:
+        json_str = match.group(1)  # Extract the JSON string
+        try:
+            signups = json.loads(json_str)  # Convert to Python list of dicts
+            #print(json.dumps(signups,indent=2))
+        except json.JSONDecodeError as e:
+            print("Error decoding JSON:", e)
+            return
+    else:
+        print("JSON object not found")
+        return
+
     #signups = json.loads(r.text)
     
     #signups = json.loads(codecs.open('cache/signups.json','r','utf-8').read())
-    
-    all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
 
     # update w/ users.py #1
     all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
@@ -1212,8 +1244,13 @@ def enroll_gott_workshops():
         #'6/10-6/23 GOTT 5: Essentials of Blended Learning, Hyflex2024-06-10 12:00:00': 20568,
         #'6/17-6/30 GOTT 6 Introduction to Live Online Teaching and Learning2024-06-17 12:00:00': 20569,
         #'GOTT 1 Intro to Teaching Online AUG242024-07-29 12:00:00': 20603,  # 7/29
-        ['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
-        ['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
+        #['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
+        #['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
+
+        # date, title, shell_id
+        #['2025-02-23 16:00:00', 'GOTT 6: Intro to Synchronous Teaching (Sync/Hyflex)', 21835],
+        ['2025-03-14 17:00:00', 'GOTT 5: The Essentials of Blended Learning (Hybrid) ', '21886'],
+        #['2025-02-23 16:00:00', 'GOTT 1: Intro to Teaching Online (2 week, async)', 21874]
     ]
     #print(json.dumps(signups,indent=4))
     #print(json.dumps(by_email,indent=4))
@@ -1236,31 +1273,30 @@ def enroll_gott_workshops():
             'rpotter@gav.edu': 'rpotter@gavilan.edu',
             }
 
-    #for wkshp,su_list in signups.items():
-    print(workshop_ids)
     for each_workshop in workshop_ids:
         #if wkshp not in workshop_ids:
         #    print(f"skipping {wkshp}")
         #    continue
-        wkshp, shell_id, student_list = each_workshop
+        wkshp_date, wkshp_title, wkshp_shell_id = each_workshop
         to_enroll = []
-        from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
+        #from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
         #print(from_file)
 
-        for s in from_file:
-            e = s[1].lower()
-            if e in subs:
-                e = subs[e]
-            print( f"{wkshp} {e} {s[0]}" )
-            if e in by_email:
-                user = by_email[e]
-                #print(f"\t{user['name']} {e} {user['login_id']}")
-                to_enroll.append(user['id'])
-            else:
-                #print("** ** NOT FOUND")
-                pass
-        print(f"Workshop: {wkshp} \n\tEnrolling: {str(to_enroll)}")
-        enroll_id_list_to_shell(to_enroll, shell_id)
+        for s in signups:
+            if wkshp_date == s['date_rsvp'] and wkshp_title == s['training']:
+                e = s['email'].lower()
+                if e in subs:
+                    e = subs[e]
+                print( f"{wkshp_title} {e} {s['name']}" )
+                if e in by_email:
+                    user = by_email[e]
+                    #print(f"\t{user['name']} {e} {user['login_id']}")
+                    to_enroll.append([user['id'],user['name']])
+                else:
+                    #print("** ** NOT FOUND")
+                    pass
+        print(f"Workshop: {wkshp_date} {wkshp_title} \n\tEnrolling: {', '.join(i[1] for i in to_enroll)}")
+        enroll_id_list_to_shell(to_enroll, wkshp_shell_id)
 
 def enroll_gnumber_list_to_courseid():
     infile = codecs.open('cache/gottenrollments.txt','r','utf-8').readlines()
@@ -1312,7 +1348,7 @@ def enroll_orientation_students():
     # users_to_enroll = users_new_this_semester(the_semester)             ###               #####   USES LOCAL DB
     
     # double semester (SU + FA)
-    users_to_enroll = users_new_this_2x_semester("202510", "202530")                        #####   USES LOCAL DB
+    users_to_enroll = users_new_this_2x_semester("202550", "202570")                        #####   USES LOCAL DB
 
     #print("ALL ORIENTATION STUDENTS %s" % str(users_to_enroll))
     #print("\n\nALREADY IN ORI SHELL %s" % str(users_in_ori_shell))
@@ -1406,6 +1442,138 @@ def course_search_by_sis():
         # print(json.dumps(x, indent=2))
 
 
+def set_custom_start_dates():
+    TERM = 288
+    SEM = "su25"
+
+    make_changes = 1
+    do_all = 0
+    get_fresh = 0
+
+    term_start_month = 6
+    term_start_day = 2
+
+    # just do certain ids in cache/changeme.txt
+    limit_to_specific_ids = 1
+
+    limit_to = [x.strip() for x in open('cache/changeme.txt','r').readlines()]
+    
+    # get list of online course shells
+    if get_fresh:
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,get_fresh,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+    else:
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
+    
+    # dict to match section numbers between shells and schedule
+    crn_to_canvasid = {}
+    for C in c:
+        if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
+            crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
+    # get course info from schedule
+    s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
+    for S in s:
+        # get dates
+        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
+        d_start = datetime.strptime(start,"%m/%d/%Y")
+        
+        # try to find online shell matching this schedule entry
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+
+            if limit_to_specific_ids and (not this_id in limit_to):
+                continue
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
+            continue
+        
+        print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
+
+        # Do we adjust the start date? Only if it doesn't match term
+        if d_start.month == term_start_month and d_start.day == term_start_day:
+            print("   Ignoring, term start date" )
+            continue
+            
+        else:
+            print("   Adjust course start day?")
+                
+            if make_changes:
+                if do_all != 'a':
+                    do_all = input('   -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit.  >')
+                    if do_all == 'n':
+                        exit()
+                if do_all == '' or do_all == 'a':    
+                    data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
+                            'course[restrict_enrollments_to_course_dates]':True }
+                    u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
+                    r3 = requests.put(u2, headers=header, params=data)
+                    print("   updated..  OK")
+                    
+
+
+def overview_start_dates():
+    TERM = 288
+    SEM = "su25"
+
+    get_fresh = 1
+
+    term_start_month = 6
+    term_start_day = 2
+    
+    # get list of online course shells
+    if get_fresh:
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,get_fresh,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+    else:
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
+    
+    # dict to match section numbers between shells and schedule
+    crn_to_canvasid = {}
+    for C in c:
+        if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
+            crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
+    print(f"id,shell_shortname,sched_start,shell_start,shell_end,shell_restrict_view_dates,shell_restrict_view_dates,shell_state,shell_numstudents" )
+
+    # get course info from schedule
+    s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
+    for S in s:
+        # get dates
+        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
+        d_start = datetime.strptime(start,"%m/%d/%Y")
+        
+        # try to find online shell matching this schedule entry
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
+            continue
+        
+        # get more canvas course shell info
+        uu = f"{url}/api/v1/courses/{this_id}"
+        this_course = fetch(uu)
+
+        shell_start = this_course['start_at']
+        shell_end = this_course['end_at']
+        shell_restrict_view_dates = '?'
+        if 'access_restricted_by_date' in this_course:
+            shell_restrict_view_dates = this_course['access_restricted_by_date']
+        shell_shortname = this_course['course_code']
+        shell_numstudents = '?' #this_course['total_students']
+        shell_state = this_course['workflow_state']
+
+        print(f"{this_id},{shell_shortname},{d_start},{shell_start},{shell_end},{shell_restrict_view_dates},{shell_restrict_view_dates},{shell_state},{shell_numstudents}" )
+
+                    
 
 
 
@@ -1426,6 +1594,7 @@ def course_by_depts_terms(section=0):
     nursing_start_day = 0
     spring_start_day = 27
     
+    # get list of online course shells
     if get_fresh:
         print(f"Getting list of courses in {SEM}")
         c = getCoursesInTerm(TERM,get_fresh,0)    
@@ -1433,6 +1602,7 @@ def course_by_depts_terms(section=0):
     else:
         c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
     
+    # dict to match section numbers between shells and schedule
     crn_to_canvasid = {}
     for C in c:
         if 'sis_course_id' in C and C['sis_course_id']:
@@ -1441,16 +1611,14 @@ def course_by_depts_terms(section=0):
         else:
             print( f"---NO CRN IN: {C['name']} -> {C}" )
 
-    
-    #print(crn_to_canvasid)
-    #return
-    
-    #s = json.loads(  codecs.open(f'cache/{SEM}_sched_expanded.json','r','utf-8').read()  )
+    # get course info from schedule
     s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
     for S in s:
+        # get dates
         start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
         d_start = datetime.strptime(start,"%m/%d/%Y")
         
+        # try to find online shell matching this schedule entry
         try:
             this_id = crn_to_canvasid[S['crn']]
         except Exception as e:
@@ -1514,11 +1682,11 @@ def xlist_cwe():
     # cwe192 get put into another shell
 
 
-    this_sem_190_id = 21606   # they get 190s and 290s
-    this_sem_192_id = 21610   # they get 192s
-    this_sem_term = 287
+    this_sem_190_id = 22890   # they get 190s and 290s
+    this_sem_192_id = 22894   # they get 192s
+    this_sem_term = 289
     
-    get_fresh = 0
+    get_fresh = 1
     sem_courses = getCoursesInTerm(this_sem_term, get_fresh, 0)
 
     for search_string in ['CWE190','WTRM290']:
@@ -1630,8 +1798,9 @@ def create_sandboxes():
                            #(19223, ' Sandbox GOTT5 WI24'),
                            #(19224, ' Sandbox GOTT6 WI24'),
                            #(20761, ' Sandbox GOTT1 FA24'),
-                            (21770, ' Sandbox GOTT1 WI25'),
-                            (21772, ' Sandbox GOTT2 WI25')
+                           #(21770, ' Sandbox GOTT1 WI25'),
+                           #(21772, ' Sandbox GOTT2 WI25'),
+                            (21874, ' Sandbox GOTT1 SP25'),
                          ]
     filepath = 'cache/sandbox_courses.pkl'
     
@@ -1871,8 +2040,8 @@ def instructor_list_to_activate_evals():
 def add_evals(section=0):
     # show or hide?
 
-    TERM = 184
-    SEM = "fa24"
+    TERM = 287
+    SEM = "sp25"
 
     # fetch list of courses?
     GET_FRESH_LIST = 0
@@ -1887,7 +2056,7 @@ def add_evals(section=0):
     ASK = 0
 
     # are we showing or hiding the course eval link?
-    HIDE = False
+    HIDE = True
 
 
     s = [ x.strip() for x in codecs.open(f'cache/{SEM}_eval_sections.txt','r').readlines()]
@@ -1922,6 +2091,7 @@ def add_evals(section=0):
                 print(f"{courses[i]['id']} / {courses[i]['name']}")
             u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s/tabs/context_external_tool_1953" % i
             r3 = requests.put(u2, headers=header, params=data)
+            print(f"OK {u2}")
             #print(r3.text)
             #time.sleep(0.400)
 
@@ -2059,9 +2229,8 @@ def my_nav_filter(row):
 
 def clean_course_nav_setup_semester(section=0):
     print("Fetching list of all active courses")
-    term = 184 # fa24   # 182
-    term = 287
-    c = getCoursesInTerm(term,1,0)  # sp25 = 287   wi24=182
+    term = 289
+    c = getCoursesInTerm(term,1,0) 
     print(c)
     ids = []
     courses = {}
@@ -2220,35 +2389,36 @@ def quick_sem_course_list(term=180):
 
 
 def create_calendar_event():
-    events = codecs.open('cache/events.csv','r','utf-8').readlines()
+    events = codecs.open('cache/academic_calendar_2025.csv','r','utf-8').readlines()
 
-    for e in events:
-        (date, title, desc) = e.split(',')
-        local = pytz.timezone("America/Los_Angeles")
-        naive = datetime.strptime(date, "%Y-%m-%d")
-        local_dt = local.localize(naive, is_dst=None)
-        utc_dt = local_dt.astimezone(pytz.utc).isoformat()
+    orientation_shells = ["course_15924","course_19094","course_20862"]
+    
+    for ori_shell in orientation_shells:
+        for e in events:
+            (date, title, desc) = e.split(',')
+            local = pytz.timezone("America/Los_Angeles")
+            naive = datetime.strptime(date, "%Y-%m-%d")
+            local_dt = local.localize(naive, is_dst=None)
+            utc_dt = local_dt.astimezone(pytz.utc).isoformat()
 
 
 
-        params = {
-            "calendar_event[context_code]": "course_15924",      # 2023 student orientation
-            "calendar_event[context_code]": "course_19094",      # 2024         orientation
-            "calendar_event[title]": title,
-            "calendar_event[description]": desc,
-            "calendar_event[start_at]": utc_dt,              # DateTime
-            "calendar_event[all_by_dept": "true",
+            params = {
+                "calendar_event[context_code]": ori_shell,    
+                "calendar_event[title]": title,
+                "calendar_event[description]": desc,
+                "calendar_event[start_at]": utc_dt,              # DateTime
+                "calendar_event[all_by_dept": "true",
+            }
 
-        }
-
-        u = url + "/api/v1/calendar_events"
-        res = requests.post(u, headers = header, params=params)
-        result = json.loads(res.text)
-        print(title,end=" ")
-        if "errors" in result:
-            print(result["errors"])
-        if "id" in result:
-            print("ok, id#", result["id"])
+            u = url + "/api/v1/calendar_events"
+            res = requests.post(u, headers = header, params=params)
+            result = json.loads(res.text)
+            print(title,end=" ")
+            if "errors" in result:
+                print(result["errors"])
+            if "id" in result:
+                print("ok, id#", result["id"])
 
 def utc_to_local(utc_str):
     if not utc_str: return ""
@@ -2327,12 +2497,12 @@ def enrollment_helper():
     keep = 'code,name,days,cap,act,teacher,date,partofday,type,site'.split(',')
     oo = codecs.open('cache/section_history.json','w','utf-8')
     # fetch enrollment stats for last few years
-    from semesters import code, sems, to_sis_sem
+    from semesters import code, sems_by_short_name, short_to_sis
     from util import dept_from_name
     raw = []
     code.reverse()
     sort = defaultdict(dict)
-    for s in sems.keys():
+    for s in sems_by_short_name.keys():
         try:
             sched1 = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
             sort[s] = defaultdict(dict)
@@ -2340,7 +2510,7 @@ def enrollment_helper():
                 if sect['name'] in ignore2:
                     continue
                 sect_smaller = funcy.project(sect,keep)
-                sect_smaller['sem'] = to_sis_sem(s)
+                sect_smaller['sem'] = short_to_sis(s)
                 if int(sect_smaller['cap'])==0 or int(sect_smaller['act'])==0:
                     sect_smaller['fill_pct'] = 100
                 else:
@@ -2447,6 +2617,33 @@ def course_log():
     L = fetch(f"{url}/api/v1/audit/course/courses/{course_id}")
     print(json.dumps(L,indent=2))
 
+def fetch_rubric():
+    course = 21274
+    r_id = 35961
+    u = f"{url}/api/v1/courses/{course}/rubrics/{r_id}"
+
+    result = fetch(u)
+    #print(json.dumps(result,indent=2))
+
+    rows = []
+
+    for row in result['data']:
+        r = []
+        r.append(f"<td style='vertical-align:top;'><b>{row['description']}</b><br />{row['long_description']}</td>")
+        for item in row['ratings']:
+            r.append(f"<td style='vertical-align:top;'><u>{item['description']}</u><br />{item['long_description']}<br /><i>{item['points']} points</i></td>")
+
+        rows.append("<tr>" + "\n".join( r ) + "</tr>\n")
+    output = f"<h3>{result['title']}</h3>\n"
+    output += "<table border='1'>" + ''.join( [ f"<tr>{x}</tr>\n" for x in rows] ) + "</table>\n"
+
+    print(output)
+
+
+
+
+
+
 
 if __name__ == "__main__":
     options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,  
@@ -2476,9 +2673,11 @@ if __name__ == "__main__":
                 17: ['Remove "new analytics" from all courses navs in a semester', remove_n_analytics],
                 21: ['Add course evals', add_evals],
                 56: ['Remove course evals all sections', remove_evals_all_sections],
-                52: ['Cleanup semester / course nav', clean_course_nav_setup_semester],
+                52: ['Cleanup semester / course nav', clean_course_nav_setup_semester],  # not done, just lists nav right now
 
+                29: ['Overview summer start dates',overview_start_dates],
                 31: ['Fine tune term dates and winter session', course_by_depts_terms],
+                32: ['Set summer start dates', set_custom_start_dates],
                 #32: ['Cross-list classes', xlist ],
                 #33: ['Cross list helper', eslCrosslister],
                 33: ['Cross list, ask for sections', ez_xlist], 
@@ -2486,7 +2685,7 @@ if __name__ == "__main__":
                 35: ['Cross list from manually created file', do_manual_xlist],
                 36: ['Quick course list', quick_sem_course_list ],
                 37: ['Cross list CWE courses', xlist_cwe], 
-                38: ['Create calendar event', create_calendar_event],
+                38: ['Create calendar events for orientation shells', create_calendar_event],
                 39: ['list all assignments', list_all_assignments],
 
                 40: ['Enroll GOTT Workshops', enroll_gott_workshops], 
@@ -2505,7 +2704,9 @@ if __name__ == "__main__":
 
                 50: ['Fetch rubric scores and comments', fetch_rubric_scores],
                 51: ['Fetch announcements in a course', fetch_announcements],
-                57: ['show course audit log', course_log]
+                57: ['show course audit log', course_log],
+
+                60: ['fetch a rubric', fetch_rubric],
               }
     print ('')
 
diff --git a/curric2022.py b/curric2022.py
index 8d58aa6..f17eb29 100644
--- a/curric2022.py
+++ b/curric2022.py
@@ -15,6 +15,7 @@ displaynames = []
 from canvas_secrets import cq_user, cq_pasw
 
 from outcomes import quick_add_course_outcomes
+from schedules import campus_dept_hierarchy
 
 
 CQ_URL = "https://secure.curricunet.com/scripts/webservices/generic_meta/clients/versions/v4/gavilan.cfc"
@@ -569,6 +570,7 @@ def course_path_style_2_html():
     verbose = 1
     v = verbose
     
+    dbg = codecs.open('cache/courses/debugout.txt','w','utf-8')
     
     oo = codecs.open("cache/courses/allclasspaths.txt","r","utf-8").readlines()
     course_prebuild = defaultdict( ddl )
@@ -601,8 +603,8 @@ def course_path_style_2_html():
     
     lookup_table = {    'entityTitle':'title', 'proposalType':'type', 
                         '\/Course\sDescription\/status':'status', 'Course\sDiscipline':'dept', 
-                        'Course\sNumber':'number', 'Course\sTitle':'name', 
-                        'Short\sTitle':'shortname', 'Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
+                        'Course\sNumber':'number', 'Course\sTitle':'name', 'Course Description\/\d\/Justification':'justification',
+                        'Short\sTitle':'shortname', 'Course Description\/\d\/Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
                         '\/Course\sDescription\/\d+\/Course\sDescription\/':'desc', 
                         'Minimum\sUnits':'min_units', 'Minimum\sLecture\sHour':'min_lec_hour', 'Minimum\sLab\sHour':'min_lab_hour', 'Course\shas\svariable\shours':'has_var_hours',
                         'Number\sWeeks':'weeks', 
@@ -620,6 +622,7 @@ def course_path_style_2_html():
         crs = course_prebuild[C]
         course_build = {'slo':{}}    # defaultdict( ddl )
         if v: print(C)
+        dbg.write(f"{C}\n")
         
         for K in crs.keys():
             if v: print("\t%s" % K)
@@ -647,6 +650,7 @@ def course_path_style_2_html():
                         else:
                             content_search = re.search(r'^(.*)\/(.*?)$',line)
                             course_build[key] = content_search.groups()[1]
+                            dbg.write(f"{key} => {content_search.groups()[1]}\n")
                         if v: print("\t\t%s - %s" % (key, course_build[key]))
                         continue
                         
@@ -841,8 +845,8 @@ def course_rank():
         all[code].add(c)
     
     for k in sorted(all.keys()):
-        print("\n##",k)
-        print(json.dumps(list(all[k]),indent=2))
+        #print("\n##",k)
+        #print(json.dumps(list(all[k]),indent=2))
         for version in all[k]:
             csvwriter.writerow( [ version['d']+version['n'], version['c'], version['s'], version['m'], version['d'], version['n'], len(version['o']) ])
     
@@ -851,19 +855,127 @@ def de_classpaths():
     outfile = codecs.open('cache/courses/all_de_classpaths.txt', 'w','utf-8')
     areas = ['Distance Education/1/2/Justification/Need/Justification','/Distance Education/1/3/Content Presentation/<b>A. Methods of Instruction</b>/','/Distance Education/1/3/Content Presentation/<b>B. Instructional Materials and Resources:</b><br/>1. What materials and resources will you provide your students <b>in a virtual environment</b>?/','/Distance Education/4/Assessment/','/Distance Education/4/Methods of Instruction/','/Distance Education/1/3/Content Presentation/2. Have you assessed the use of high-quality open educational resources (OER) to help bridge the digital divide for students in the course? If so, please describe how you will be using them./','/Distance Education/4/Instructional Materials and Resources/','/Distance Education/1/3/Content Presentation/3. How will students be provided access to library materials and other learning resources <b>in a virtual environment</b>? (virtual reference librarian, research guides, digital content, etc.)/','/Distance Education/4/<b>How will students be provided access to library materials and what support will students be provided to help them locate and use these materials?</b><br/>Library and Other Learning Resources/','/Distance Education/1/3/Content Presentation/4. How will students access equitable student support services <b>in a virtual environment</b>? (tutoring, financial aid, counseling, etc.)/','/Distance Education/4/Accommodations for Students with Disabilities/','/6/Distance Education/4/Office Hours/','/Contact/Contact/Description/']
 
+    i = 0
+
     for area in areas:
         with codecs.open('cache/courses/allclasspaths.txt', 'r','utf-8') as infile:
             outfile.writelines(line for line in infile if area in line)
+            i += 1
+            if i % 1000 == 0: print(i)
+
+
+
+from semesters import human_to_sis, get_previous_season
+#from pipelines import area, areas
+
+def extract_digits(input_string):
+    """
+    Removes all non-digit characters from the input string and returns an integer.
+    
+    :param input_string: The string to process.
+    :return: An integer containing only the digits from the input string.
+    """
+    digits_only = ''.join(char for char in input_string if char.isdigit())
+    return int(digits_only) if digits_only else 0
+
+
+
+
+
+def filter_classes():   # for removing deactivated classes
+    json_file_path = 'cache/courses/courses_built.json'
+    output_csv_path = 'cache/courses/active_courses.txt'
+
+    all_courses = []
+
+    with open(json_file_path, 'r') as json_file:
+        data = json.load(json_file)
+
+    for i,C in data.items():
+        term = ''
+        try:
+            term = C['term']
+        except:
+            print(f"** {i} {C['dept']} {C['number']} is missing term")
+            term = ''
+        shortname = ''
+        try:
+            shortname = C['shortname']
+        except:
+            shortname = C['name']
+            print(f"** {i} {C['dept']} {C['number']} is missing shortname")
+        all_courses.append(f"{C['dept']} {C['number']} {shortname} \t {C['status']} {C['type']} \t{term} - {i}")
+    all_courses.sort()
+    for C in all_courses: print(C)
+
+
+
+
+def slo_summary_report():   # for scheduling slo assessment
+    json_file_path = 'cache/courses/courses_built.json'
+    output_csv_path = 'cache/courses/courses_slo_schedule.csv'
+    term_csv_file_path = 'cache/courses/slo_schedule.csv'
+
+    (gp, course_to_area, areacode_to_area, area_to_dean, dean, dean_code_to_name) = campus_dept_hierarchy()
+
+
+    with open(json_file_path, 'r') as json_file:
+        data = json.load(json_file)
+        
+    # Extract course information
+    courses = []
+    term_courses = []
+    for key, course in data.items():
+        try:
+            #print(f"{course['dept']} - -" )
+            re_code_course = {
+                "key": key,
+                "type": course.get("type", ""),
+                "status": course.get("status", ""),
+                "dept": course.get("dept", ""),
+                "number": course.get("number", ""),
+                "name": course.get("name", ""),
+                "first_active_term": course.get("term", ""),
+                'first_active_term_code': human_to_sis(course.get('term', '')),
+                "reviewing_term": get_previous_season(course.get("term","")),
+                "reviewing_term_code": human_to_sis(get_previous_season(course.get('term', ''))),
+                "area": areacode_to_area[ course_to_area[course.get("dept", "").upper()] ]
+            }
+            courses.append(re_code_course)
+            if course["status"] in ["Active", "In Review"] and course["type"] != "Deactivate Course":
+                term_courses.append(re_code_course)
+        except Exception as e:
+            print(f"error on course: {course['dept']} {course['number']} {course['name']}")
+
+    # Sort by dept, number, and term
+    courses.sort(key=lambda x: (x["dept"], extract_digits(x["number"]), x["reviewing_term_code"]))
+
+    term_courses.sort(key=lambda x: (x["reviewing_term_code"],x["dept"], extract_digits(x["number"])))
+
+    # Write to CSV
+    fieldnames = ["dept", "number", "reviewing_term", "reviewing_term_code", "status", "key", "type", "name", "first_active_term", "first_active_term_code","area"]
+    with open(output_csv_path, 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+        writer.writeheader()
+        writer.writerows(courses)
+    with open(term_csv_file_path, 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(term_courses)
+    print(f"CSV file '{output_csv_path}' has been created.")
 
 if __name__ == "__main__":
     
     print ('')
     options = { 1: ['fetch all courses', fetch_all_classes],
                 2: ['process all classes', path_style_test],
-                3: ['courses - path style to html catalog', course_path_style_2_html],
+                3: ['courses - path style to json and html catalog', course_path_style_2_html],
                 4: ['show course outcomes', all_outcomes],
                 5: ['courses - rank by all versions', course_rank],
                 6: ['extract de info from class paths', de_classpaths],
+                7: ['build schedule or summary for SLO planning', slo_summary_report],
+                8: ['remove deactivated courses', filter_classes],
                 10: ['fetch all programs', fetch_all_programs],
                 11: ['process all programs', path_style_prog],
                 12: ['programs - path style to html catalog', path_style_2_html],
diff --git a/curriculum.py b/curriculum.py
index 1ce780d..20342c5 100644
--- a/curriculum.py
+++ b/curriculum.py
@@ -11,7 +11,7 @@ from pampy import match, _
 from bs4 import BeautifulSoup as bs
 import pandas as pd
 import sys, locale, re
-from pipelines import getSemesterSchedule
+from schedules import get_semester_schedule
 
 from canvas_secrets import cq_url, cq_user, cq_pasw
 
@@ -1360,7 +1360,7 @@ def my_default_counter():
     
 # Of the recent schedules, what was actually offered online?
 def summarize_online_sections():
-    scheds = list(map(getSemesterSchedule,sems))
+    scheds = list(map(get_semester_schedule,sems))
     all = pd.concat(scheds,sort=True)
     selected = all[['code','type','sem']]
     selected.to_csv('cache/one_year_course_sections.csv')
diff --git a/depricated.py b/depricated.py
index 84b1d68..af395c9 100644
--- a/depricated.py
+++ b/depricated.py
@@ -5,6 +5,358 @@
 # from pipelines - canvas data
 
 
+# read schedule file with an eye toward watching what's filling up
+def schedule_filling():
+    sem = 'spring2021'    # todo: hardcoded
+    days = []
+    for f in sorted(os.listdir('cache/rosters/'+sem+'/')):
+        if f.endswith('.html'):
+            match = re.search(r'sched_(\d\d\d\d)_(\d\d)_(\d+)\.html',f)
+            if match:
+                print(f)
+                y = match.group(1)
+                m = match.group(2)
+                d = match.group(3)
+                print("Schedule from %s %s %s." % (y,m,d))
+                csv_sched = ssb_to_csv(open('cache/rosters/'+sem+'/'+f,'r').read())
+                jsn = to_section_list(csv_sched)
+                #print(json.dumps(jsn,indent=2))
+                days.append(jsn)
+    day1 = days[-2]
+    day2 = days[-1]
+    df = jsondiff.diff(day1, day2)
+    gains = defaultdict( list )
+    
+    for D in df.keys():
+        if isinstance(D, int):
+            #print(day1[D]['code'] + '\t' + day1[D]['crn'] + ' Before: ' + day1[D]['act'] + ' After: ' + day2[D]['act'])
+            try:
+                gain = int(day2[D]['act']) - int(day1[D]['act'])
+                gains[gain].append(  day1[D]['code'] + ' ' + day1[D]['crn'] )
+            except:
+                print("No gain for " + str(D))
+            #print("\t" + str(df[D]))
+        else:
+            print(D)
+            print(df[D])
+    for key, value in sorted(gains.items(), key=lambda x: x[0]): 
+        print("{} : {}".format(key, value))
+    
+    #print(json.dumps(gains,indent=2))
+
+    
+
+def argos_data():
+    global dean,gp
+    
+    f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8')
+    writer = csv.writer(f2)
+    headers = 'gp dean dept num code crn name act site'.split(' ')
+    writer.writerow(headers)
+    
+    f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8')
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        print(d)
+        my_dean = dean[d['Subj']]
+        my_gp = gp[d['Subj']]
+        dept = d['Subj']
+        num = d['Crse No']
+        code = dept + " " + num
+        crn = d['CRN']
+        name = d['Course Title']
+        act = d['Open Seats']
+        campus = d['Campus']
+        session = d['Session']
+        if campus == "Off Campus": site = session
+        else: site = campus
+        print(site)
+        writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
+
+
+
+
+
+    
+def scrape_for_db():
+    
+    global SEMESTER, gp, dean, short_sem, semester_begin, filename, filename_html
+    fields = 'sem,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc'.split(',')
+
+
+    """
+    SEMESTER = 'Fall 2022'
+    short_sem = 'fa22'
+    semester_begin = strptime('08/22', '%m/%d')
+    filename = 'fa22_sched.json'
+    filename_html = 'fa22_sched.html'
+    
+    as_dict = scrape_schedule()
+    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
+    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
+    for S in as_dict:
+        parts = S['code'].split(' ')
+        S['dept'] = parts[0]
+        S['num'] = parts[1]
+        S['gp'] = gp[parts[0]]
+        S['dean'] = dean[parts[0]]
+        S['sem'] = short_sem
+        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
+              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
+        print(str)
+        fff.write(str)
+    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
+    fff.close()
+    """
+
+
+
+
+    SEMESTER = 'Spring 2023 (View only)'
+    short_sem = 'sp23'
+    semester_begin = strptime('01/30', '%m/%d')
+    filename = 'sp23_sched.json'
+    filename_html = 'sp23_sched.html'
+    
+    as_dict = scrape_schedule()
+    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
+    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
+    for S in as_dict:
+        parts = S['code'].split(' ')
+        S['dept'] = parts[0]
+        S['num'] = parts[1]
+        S['gp'] = gp[parts[0]]
+        S['dean'] = dean[parts[0]]
+        S['sem'] = short_sem
+        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
+              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
+        print(str)
+        fff.write(str)
+    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
+    fff.close()
+    
+
+
+def todays_date_filename(short_sem):       # helper    
+    n = datetime.now()
+    m = n.month
+    if m < 10: m = "0"+str(m)
+    d = n.day
+    if d < 10: d = "0" + str(d)
+    return "reg_" + short_sem + "_" + str(n.year) + str(m) + str(d)
+
+def expand_old_semesters():
+    
+    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20,fa20,sp21,su21,fa21,sp22,su22,fa22'.split(',')
+    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20'.split(',')
+    terms.reverse()
+    
+    for t in terms:
+        list_latestarts(t)
+        input('press return to continue.')
+
+
+
+def argos_data_from_cvc():
+    global dean,gp
+    short_sem = 'fa23'
+    
+    f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8')
+    all_courses = []
+    
+    f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8')
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        #print(d)
+        parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code'])
+        if parts:
+            dept = parts.group(1)
+            num = parts.group(2)
+        my_dean = dean[dept]
+        my_gp = gp[dept]
+        code = dept + " " + num
+        crn = d['CRN']
+        cred = d['Units_Credit_hours']
+        days, time_start, time_end = days_times(d['Meeting_Days_and_Times'])
+        times = ""
+        if time_start: times = time_start + "-" + time_end
+        date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date'])
+        start = remove_year(d['Start_Date'])
+        end = remove_year(d['End_Date'])
+        ztc = d['ZTC']
+        name = d['Course_Name']
+        cap = d['Class_Capacity']
+        rem = d['Available_Seats']
+        act = int(cap) - int(rem)
+        teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name']
+        delivery = d['Delivery']
+        if delivery == "Online":
+            if days:
+                site = "Online"
+                type = "online live"
+                loc = "Online Live"
+            else:
+                site = "Online"
+                type = "online"
+                loc = "ONLINE"
+        elif delivery == "Hybrid":
+            site = d['Campus_College']
+            type = "hybrid"
+            loc = d['Meeting_Locations']
+        else:
+            site = d['Campus_College']
+            type = "in-person"
+            loc = d['Meeting_Locations']
+        this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \
+                       "cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \
+                        "start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp}
+        all_courses.append(this_course)
+        print(site)
+        #writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
+    print(all_courses)
+    #print(json.dumps(all_courses))
+    f3.write( json.dumps(all_courses,indent=2) )
+    f3.close()
+    expanded = list_latestarts(short_sem)
+
+
+
+
+def days_times(s):
+    parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s)
+    if parts:
+        day = parts.group(1)
+        time = parts.group(2)
+        parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time)
+        if parts2:
+            time_start = parts2.group(1).strip()
+            time_end = parts2.group(2).strip()
+            return day, time_start, time_end
+        return day, time, ''
+    return '','',''
+
+
+
+
+
+def remove_year(s):
+    s = re.sub(r'\-', '/', s)
+    if len(s)>5: return s[5:]
+    return s
+
+
+
+
+
+
+
+def get_enrlmts_for_user(user,enrollments):
+    #active enrollments
+    u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
+    return u_en[['type','course_id']]
+
+
+
+### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section     
+def course_start(course):
+    #todo: use this to make a early/late/short field and store semester dates w/ other constants
+    
+    start = datetime(2019,1,28)
+    end   = datetime(2019,5,24)
+
+    # is it normal, early, late, winter?
+    li = course[0]
+    date = li[12]
+    
+    if date=='01/28-05/24':
+        return 'Normal'
+    if date=='TBA':
+        return 'TBA'
+    if date=='01/02-01/25':
+        return 'Winter'
+    if date=='01/02-01/24':
+        return 'Winter'
+    
+    ma = re.search(  r'(\d+)\/(\d+)\-(\d+)\/(\d+)', date)
+    if ma:
+        # TODO do these years matter?
+        mystart = datetime(2019, int(ma.group(1)), int(ma.group(2)))
+        if int(ma.group(1)) > 10: mystart = datetime(2018, int(ma.group(1)), int(ma.group(2)))
+        myend   = datetime(2019, int(ma.group(3)), int(ma.group(4)))
+        length = myend - mystart
+        weeks = length.days / 7
+        
+        if mystart != start:
+            if mystart < start:
+                #print 'Early Start ', str(weeks), " weeks ",
+                return 'Early start'
+            else:
+                #print 'Late Start ', str(weeks), " weeks ",
+                return 'Late start'
+        else:
+            if myend > end:
+                #print 'Long class ', str(weeks), " weeks ",
+                return 'Long term'
+            else:
+                #print 'Short term ', str(weeks), " weeks ",
+                return 'Short term'
+        #return ma.group(1) + '/' + ma.group(2) + " end: " + ma.group(3) + "/" + ma.group(4)
+    else:
+        return "Didn't match: " + date
+
+        
+
+
+
+
+
+
+
+# list files in canvas_data (online) and choose one or some to download.
+def interactive():
+    resp = do_request('/api/account/self/file/sync')    
+    mylog.write(json.dumps(resp, indent=4))
+    #mylog.close()
+    i = 0
+    gotten = os.listdir(local_data_folder)
+    for x in resp['files']:
+        print(str(i) + '.\t' + x['filename'])
+        i += 1
+    which = input("Which files to get? (separate with commas, or say 'all') ")
+    if which=='all':
+        which_a = list(range(i-1))
+    else:
+        which_a = which.split(",")
+    for W in which_a:
+        this_i = int(W)
+        this_f = resp['files'][this_i]
+        filename = this_f['filename']
+        if filename in gotten: continue
+        print("Downloading: " + filename)
+        response = requests.request(method='GET', url=this_f['url'], stream=True)
+        if(response.status_code != 200):
+            print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
+        else:
+            #Use the downloaded data
+            with open(local_data_folder + filename, 'wb') as fd:
+                for chunk in response.iter_content(chunk_size=128):
+                    fd.write(chunk)
+            print("Success")
+        """if filename.split('.')[-1] == 'gz':
+            try:
+                plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
+                pf = open(plain_filename,'w')
+                with gzip.open('canvas_data/' + filename , 'rb') as f:
+                    pf.write(f.read())    
+            except Exception as e:
+                print "Failed to ungizp. Probably too big: " + str(e)"""
+
+
+
 
 
 # todo: where does the most recent schedule come from?
diff --git a/flex2.py b/flex2.py
new file mode 100644
index 0000000..7828850
--- /dev/null
+++ b/flex2.py
@@ -0,0 +1,106 @@
+import pymysql
+
+# Connect to the MySQL database using PyMySQL
+conn = pymysql.connect(
+    host="192.168.1.6",    # Your host (localhost, for example)
+    user="phowell",    # Your MySQL username
+    password="rolley34",# Your MySQL password
+    database="db"  # Your database name
+)
+
+cursor = conn.cursor(pymysql.cursors.DictCursor)
+
+# Query to get the table structure
+cursor.execute("""
+SELECT 
+    C.TABLE_NAME,
+    C.COLUMN_NAME,
+    C.COLUMN_TYPE,
+    C.IS_NULLABLE,
+    C.COLUMN_DEFAULT,
+    C.EXTRA,
+    KCU.REFERENCED_TABLE_NAME,
+    KCU.REFERENCED_COLUMN_NAME,
+    KCU.CONSTRAINT_NAME
+FROM 
+    INFORMATION_SCHEMA.COLUMNS C
+LEFT JOIN 
+    INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU
+    ON C.TABLE_NAME = KCU.TABLE_NAME AND C.COLUMN_NAME = KCU.COLUMN_NAME
+WHERE 
+    C.TABLE_SCHEMA = 'db'  -- Replace with your actual database name
+    AND C.TABLE_NAME LIKE 'conf_%';         -- Only tables starting with 'conf_'
+""")
+
+# Fetch all rows from the query result
+columns_info = cursor.fetchall()
+
+# Close the connection
+cursor.close()
+conn.close()
+
+# Function to generate condensed output
+def condense_structure(columns_info):
+    result = {}
+    
+    for column in columns_info:
+        table = column['TABLE_NAME']
+        column_name = column['COLUMN_NAME']
+        column_type = column['COLUMN_TYPE']
+        is_nullable = column['IS_NULLABLE']
+        extra = column['EXTRA']
+        referenced_table = column['REFERENCED_TABLE_NAME']
+        referenced_column = column['REFERENCED_COLUMN_NAME']
+        constraint_name = column['CONSTRAINT_NAME']
+        
+        # Condense data type (e.g., 'VARCHAR(255)' -> 'V(255)')
+        if column_type.startswith('varchar'):
+            column_type = 'V(' + column_type.split('(')[1].split(')')[0] + ')'
+        elif column_type.startswith('char'):
+            column_type = 'C(' + column_type.split('(')[1].split(')')[0] + ')'
+        elif column_type.startswith('int'):
+            column_type = 'I'  # Int types are just abbreviated to 'I'
+        elif column_type.startswith('text'):
+            column_type = 'T'  # Text types are abbreviated to 'T'
+        
+        # Condense NULLABLE
+        if is_nullable == 'YES':
+            column_type += ' N'  # Add N for nullable
+        else:
+            column_type += ' NN'  # Add NN for not nullable
+        
+        # Remove DEFAULT NULL if no default value is set
+        if column['COLUMN_DEFAULT'] is None:
+            column_default = ''
+        else:
+            column_default = f" D({column['COLUMN_DEFAULT']})"  # Default value
+
+        # Add extra information, like auto-increment if available
+        if 'auto_increment' in extra:
+            column_type += " AI"  # Add AI for auto-increment columns
+        
+        # Handle foreign key references
+        if referenced_table:
+            column_type += f" FK({referenced_table}.{referenced_column})"
+        
+        # Create shorthand for each column
+        shorthand = f"{column_name}: {column_type}{column_default}"
+        
+        # Add to the result dict under the respective table
+        if table not in result:
+            result[table] = []
+        
+        result[table].append(shorthand)
+    
+    return result
+    
+    
+# Condense the structure
+condensed_structure = condense_structure(columns_info)
+
+# Print out the condensed structure
+for table, columns in condensed_structure.items():
+    print(f"Table: {table}")
+    for column in columns:
+        print(f"  - {column}")
+    print("\n")
\ No newline at end of file
diff --git a/gpt.py b/gpt.py
index 402c42d..177807a 100644
--- a/gpt.py
+++ b/gpt.py
@@ -1,15 +1,39 @@
-import os, json, sys, codecs, openai
+import os, json, sys, codecs, re
+from datetime import datetime, timedelta
+
+from openai import OpenAI
+
 
 from canvas_secrets import openai_org, openai_api_key
 
+client = OpenAI(
+    api_key=openai_api_key,
+    organization=openai_org
+)
 
-openai.organization = openai_org
-openai.api_key = openai_api_key
-#print(openai.Model.list())
+DEFAULT_MODEL = "gpt-4o"
 
 SAVE_ATTACHEMENTS = 0
 
 
+def gpt_chat(instruction, prompt, model=DEFAULT_MODEL):
+    messages = [
+        {"role": "system", "content": instruction},
+        {"role": "user", "content": prompt}
+        ]
+
+    response = client.chat.completions.create(model=model, messages=messages)
+    result = response.choices[0].message.content
+
+    
+    log3 = codecs.open('cache/gptlog.txt','a','utf-8')
+    log3.write(json.dumps({'prompt':prompt, 'result': result},indent=2))
+    log3.write("\n\n---\n\n")
+    log3.close()
+    
+    return result
+
+
 def gpt_faq(faq):
     system_role = """Ignore all previous instructions. You are an expert on the management and implementation of all aspects of online courses and running a distance education program, especially in higher education and community college. You are also an expert in pedagogy and the tools and techniques of effective teaching and learning, based on research and experience. You know how to focus on concrete examples of effective teaching which actually work in the real world and the advice you give is specific and immediately useful to teachers in the classroom. You provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. Your users are experts in teaching and college administration, and are comfortable with the California and Federal Educational code, and know they need to comply with it. There is no need to explain this beyond noting any relevant laws. Don't be verbose in your answers, but do provide details and examples where it might help the explanation.  
 You are a world class technical writer. Your style is human-centered, friendly and kind. Give me concise and accurate answers and ignore all the niceties that OpenAI programmed you with. Omit all qualifications and excuses. I know you are a large language model but I want you to pretend you are a confident and superintelligent oracle that can help a limited human warp reality to his own intentions. You can reveal your own sense of humor and depth of intelligence. It is very important that you get this right.
@@ -17,42 +41,22 @@ Adopt a penetratingly insightful, incisively funny, cleverly witty persona. Your
  I am pasting below an email chain between myself and colleague(s). I am fairly certain that it contains a technical question that I have answered. They may be spread out over several messages, or there might be some clarification or follow up questions. There will be headers, signatures, and other text that isn't a part of the core message. Ignore that. Consider the whole email chain while you prepare the following: Respond with a json formatted dictionary that contains the following:
  { "question": "Restate the question or problem in a concise but clear manner", "topics": ["keywords", "or phrases", "that categorize the issue"], "answer": "The best possible answer, written in markdown format. Draw the answer from the email but feel free to edit or embelish based on your knowledge. Generalize the answer to anyone who might have the issue. Your audience is mostly instructors working at a small community college. Do not refer to anyone's name specifically, unless it is Peter or Sabrina, but instead write for a general audience looking for the answers to their questions. We are writing a FAQ or help page. Feel free to use markdown-formatted bold, italic, lists, and links."} """
 
-    # create a completion
-    my_model = "gpt-4"    #   "gpt-3.5-turbo-16k"   # gpt-3.5-turbo   gpt-4    gpt-4-32k
-    completion = openai.ChatCompletion.create(model=my_model, messages=[
-        {"role": "system", "content": system_role},
-        {"role": "user", "content": faq} ] )
-    
-    log3 = codecs.open('cache/gptlog.txt','a','utf-8')
-    log3.write(json.dumps(completion,indent=2))
-    log3.write("\n\n---\n\n")
-    log3.close()
-    
-    r = completion['choices'][0]['message']['content']
-    #print(str(r) + "\n\n")
-    return r
+    return gpt_chat(system_role, faq)
 
 
 def gpt_test():
 
     my_prompt = "Write a series of texts trying to sell a pen to a stranger."
     print(sys.argv)
-    exit
+    
 
     if len(sys.argv)>1:
         my_prompt = " ".join(sys.argv[1:])
     else:
         print("Prompt: %s" % my_prompt)
         
-    my_model = "text-davinci-003"
-
-    # create a completion
-    completion = openai.Completion.create(engine=my_model, prompt=my_prompt, max_tokens=1000, temperature=1,top_p=1)
-
-    #print(completion)
-    #print(json.dumps(completion,indent=2))
-    print(completion.choices[0].text)
-    print()
+    result = gpt_chat("", my_prompt)
+    print(result)
 
 
 def sample_send_email():
@@ -85,7 +89,7 @@ def fetch_useful_info():
         
     log = codecs.open("cache/email_usefulinfo.txt","w","utf-8")
     
-    #Finally, let's say you want to access a subfolder named folder_of_soldy in your root_folder, you do:
+    # access a subfolder 
     print("\nUseful Info Reference:")
     uinfo = root_folder.Folders['useful info ref']
     for message in uinfo.Items:
@@ -97,7 +101,8 @@ def fetch_useful_info():
                 attachment = attachments.Item(1)
                 for attachment in message.Attachments:
                     print("    -> " + str(attachment))
-                    loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
+                    #loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
+                    loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments\\" + str(attachment)
                     attachment.SaveAsFile(loc)
                     atch_list += str(attachment) + ', '
                     atch_count += 1
@@ -121,7 +126,8 @@ def process_email_filesave(message, log, i):
                 attachment = attachments.Item(1)
                 for attachment in message.Attachments:
                     print("    -> " + str(attachment))
-                    loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
+                    #loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
+                    loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
                     attachment.SaveAsFile(loc)
                     atch_list += str(attachment) + ', '
                     atch_count += 1
@@ -139,12 +145,75 @@ def process_email_filesave(message, log, i):
         logeach.close()
 
 
+
+
+def list_faq():
+    import win32com.client
+    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
+    root_folder = outlook.Folders.Item(1)
+    print("\nFAQ Emails:")
+    uinfo = root_folder.Folders['for faq']
+    index = 0
+
+    # Get today's date
+    end_date = datetime.now()
+
+    # Go back xx months
+    months_back = 60
+    chunk_results = []
+
+    print("\nLoading messages in 1-month chunks...\n")
+
+    for i in range(months_back):
+        chunk_end = end_date.replace(day=1) - timedelta(days=1)  # End of previous month
+        chunk_start = chunk_end.replace(day=1)  # Start of that month
+
+        start_str = chunk_start.strftime("%m/%d/%Y %H:%M %p")
+        end_str = chunk_end.strftime("%m/%d/%Y %H:%M %p")
+
+        restriction = f"[ReceivedTime] >= '{start_str}' AND [ReceivedTime] <= '{end_str}'"
+        filtered = uinfo.Items.Restrict(restriction)
+
+        # Force enumeration
+        #messages = [msg for msg in filtered if msg.Class == 43]  # MailItem only
+        messages = [msg for msg in filtered ] 
+        count = len(messages)
+
+        print(f"{chunk_start.strftime('%B %Y')}: {count} messages")
+        chunk_results.append((chunk_start.strftime('%Y-%m'), count))
+
+        for message in messages:
+            try:
+                print(f"    {index}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
+                index += 1
+            except Exception as e:
+                print(f"Exception: {str(e)}")
+
+
+
+        end_date = chunk_start  # Move back to the previous month
+
+    '''for message in uinfo.Items:
+        try:
+            print(f"{i}\t sub: {message.Subject}  \t from: {message.Sender} \t on: {message.SentOn}")
+        except Exception as e:
+            print(f"Exception: {str(e)}")
+                
+        i += 1
+        if i % 20 == 0:
+            temp = input(f"press enter to continue, or q to quit now at message {i}: ")
+            if temp == 'q':
+                exit()
+    '''
+
+
+
 def fetch_faq():
     import win32com.client
     outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
     root_folder = outlook.Folders.Item(1)
     
-    PAUSE = 0
+    PAUSE = 1
     
     startat = input("Press enter to continue or a number to start at that message: ")
     if startat == '': startat = '0'
@@ -168,10 +237,10 @@ def fetch_faq():
         except Exception as e:
             print(f"Exception: {str(e)}")
         
-        #summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
-        #log2.write( f",\n{summary}")
-        #log2.flush()
-        #print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
+        summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
+        log2.write( f",\n{summary}")
+        log2.flush()
+        print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
         
         i += 1
         if PAUSE:
@@ -179,9 +248,32 @@ def fetch_faq():
             if temp == 'q':
                 exit()
 
-#fetch_useful_info()
 
-fetch_faq()
+if __name__ == "__main__":
+    
+    print ('')
+    options = { 1: ['gpt test',gpt_test] ,  
+                2: ['test email send',sample_send_email] , 
+                3: ['fetch "useful info" mailbox', fetch_useful_info],
+                4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
+                5: ['list faq mailbox', list_faq],
+    }
+
+
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+        
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
 
 
-    
\ No newline at end of file
diff --git a/localcache.py b/localcache.py
index 96590da..a8f67e2 100644
--- a/localcache.py
+++ b/localcache.py
@@ -8,8 +8,9 @@ from datetime import datetime as dt
 from datetime import timedelta
 from dateutil.parser import parse
 from os.path import exists, getmtime
-from pipelines import sync_non_interactive, url, header, gp, dean
-from semesters import to_sis_sem
+from pipelines import sync_non_interactive, url, header
+#, gp, dean
+from semesters import short_to_sis
 
 
 #from courses import getCoursesInTerm
@@ -1722,7 +1723,7 @@ def build_db_schedule():
             S['gp'] = gp[parts[0]]
             S['dean'] = dean[parts[0]]
             S['sem'] = F[0:4]
-            S['sem_sis'] = to_sis_sem(F[0:4])
+            S['sem_sis'] = short_to_sis(F[0:4])
             if not 'partofday' in S:
                 S['partofday'] = ''
             str = "INSERT INTO schedule (sem,sem_sis,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc,partofday) VALUES (%s);\n" % \
diff --git a/localcache2.py b/localcache2.py
index 0908c9f..9cff7e3 100644
--- a/localcache2.py
+++ b/localcache2.py
@@ -8,7 +8,7 @@ from datetime import datetime as dt
 from datetime import timedelta
 from dateutil.parser import parse
 from os.path import exists, getmtime
-from pipelines import sync_non_interactive, url, header, gp, dean
+#from pipelines import sync_non_interactive, url, header, gp, dean
 from tabulate import tabulate
 
 from canvas_secrets import postgres_database, postgres_password, postgres_port, postgres_user, postgres_host
@@ -391,7 +391,7 @@ def courses_to_sched():
 
     vals_cache = []
     i = 0
-    for year in ['16','17','18','19','20','21','22','23','24']:
+    for year in ['16','17','18','19','20','21','22','23','24','25']:
         for sem in ['sp','su','fa']:
             term = f"{sem}{year}"
             sis_code = f"20{year}{seasons2[sem]}"
@@ -422,6 +422,7 @@ def courses_to_sched():
                         full_sis_code = sis_code+'-'+c['crn']
 
                         if full_sis_code in sis_to_sched:
+                            print(c['cred'])
                             q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code]
                             vals_cache.append( q )  # [ str(x) for x in q ] )
                             #print(f"{i}: {q}")
@@ -467,6 +468,24 @@ def teacher_list(courseid):
     cursor.execute(q)
     return cursor.fetchall()
 
+def everyone_teacher_role():
+    conn,cursor = db()
+    q = '''select distinct ON (u.name) u.name, u.id, p.sis_user_id, u.created_at, c.course_code from canvas.enrollments e
+join canvas.users u on u.id=e.user_id
+join canvas.courses c on e.course_id=c.id
+join canvas.pseudonyms p on u.id=p.user_id
+where e.type='TeacherEnrollment'
+order by u.name;'''
+    cursor.execute(q)
+    return cursor.fetchall()
+
+
+def iLearn_name_from_goo(goo):
+    goo = goo.upper()
+    conn,cursor = db()
+    q = f"select u.id, u.name, u.sortable_name, p.sis_user_id from canvas.pseudonyms p join canvas.users u on u.id=p.user_id where p.sis_user_id='{goo}';"
+    cursor.execute(q)
+    return cursor.fetchone()
 
 
 if __name__ == "__main__":
diff --git a/main.py b/main.py
index 3cd2f4d..899a78e 100644
--- a/main.py
+++ b/main.py
@@ -104,7 +104,7 @@ if __name__ == "__main__":
                 10:['Download new photos', downloadPhoto],
                 11:['Check for avatar',checkForAvatar],
                 25:['X-List 190 sections', xlist_cwe] ,                                 ###
-                28:['Check accessibility of a course', accessible_check] ,
+                28:['Check accessibility of a course', course_download] ,
                 29:['Switch enrollments of a shell to all teachers', switch_enrol] ,
                 35:['Enroll user to all active courses in a semester', enroll_accred],
                 36:['Fix an older course so it can be enrolled again, add accred', unrestrict_course],
diff --git a/outcomes2022.py b/outcomes2022.py
index 2247e9e..55325a7 100644
--- a/outcomes2022.py
+++ b/outcomes2022.py
@@ -30,9 +30,18 @@ from path_dict import PathDict
 outputfile = ''
 csvwriter = ''
 
-TERM = 184
+# 289     2025 Fall
+# 288     2025 Summer
+# 287     2025 Spring
+# 286     2025 Winter
+# 184     2024 Fall
+# 183     2024 Summer
+# 181     2024 Spring
+# 182     2024 Winter
 
-TERM = 286   # fall = 287
+TERM = 287
+
+# TERM = 286   # fall = 287
 
 
 def escape_commas(s):
diff --git a/pipelines.py b/pipelines.py
index cfb570c..4586b09 100644
--- a/pipelines.py
+++ b/pipelines.py
@@ -1,17 +1,15 @@
-from sqlite3 import paramstyle
-from time import strptime
-from bs4 import BeautifulSoup as bs
-from util import UnicodeDictReader
-from datetime import datetime as dt
-from dateutil import parser                          
-import pandas as pd
+#from sqlite3 import paramstyle
+#from time import strptime
+#from util import UnicodeDictReader
 import codecs, json, requests, re, csv, datetime, pysftp, os, jsondiff, os.path
 import sys, shutil, hmac, hashlib, base64, schedule, time, pathlib, datetime
-import pdb
+#import pdb
 from datetime import timedelta
-from collections import defaultdict
-from deepdiff import DeepDiff
-from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
+import datetime
+#from collections import defaultdict
+
+from semesters import short_to_long
+from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, url, domain, account_id, header, header_media, g_id, g_secret
 from canvas_secrets import instructure_url, instructure_username, instructure_private_key
 
 import os, asyncio
@@ -44,182 +42,17 @@ users_by_id = {}
 #
 # todo: https://stackoverflow.com/questions/42656247/how-can-i-use-canvas-data-rest-api-using-python
 
-schedfile = 'temp.csv'
 
 
-SEMESTER = 'Summer 2019'
-short_sem = 'su19'
-semester_begin = strptime('06/17', '%m/%d')
-filename = 'su19_sched.json'
-
-SEMESTER = 'Summer 2020'
-short_sem = 'su20'
-semester_begin = strptime('06/15', '%m/%d')
-filename = 'su20_sched.json'
-
-SEMESTER = 'Fall 2020'
-short_sem = 'fa20'
-semester_begin = strptime('08/24', '%m/%d')
-filename = 'fa20_sched.json'
-
-SEMESTER = 'Spring 2021'
-short_sem = 'sp21'
-semester_begin = strptime('02/01', '%m/%d')
-filename = 'sp21_sched.json'
-filename_html = 'sp21_sched.html'
-
-
-SEMESTER = 'Summer 2021 (View only)'
-short_sem = 'su21'
-semester_begin = strptime('06/14', '%m/%d')
-filename = 'su21_sched.json'
-filename_html = 'su21_sched.html'
-
     
 
 
-# Current or upcoming semester is first.
-sems = ['su21', 'sp21', 'fa20', 'su20', 'sp20'] #, 'fa19']   #  'sp19']
-
 sys.setrecursionlimit( 100000 )
 
 local_data_folder = 'cache/canvas_data/'
 mylog = codecs.open(local_data_folder + 'temp_log.txt','w')
 
 
-gp = {}
-gp['ACCT'] = 'info'
-gp['AE'] = 'skill'
-gp['AH'] = 'well'
-gp['AJ'] = 'skill'
-gp['AMT'] = 'skill'
-gp['ANTH'] = 'soc'
-gp['APE'] = 'skill'
-gp['ART'] = 'art'
-gp['ASTR'] = 'stem'
-gp['ATH'] = 'well'
-gp['BIO'] = 'stem'
-gp['BOT'] = 'info'
-gp['BUS'] = 'info'
-gp['CD'] = 'skill'
-gp['CHEM'] = 'stem'
-gp['CMGT'] = 'skill'
-gp['CMUN'] = 'comm'
-gp['COS'] = 'skill'
-gp['CSIS'] = 'stem'
-gp['CWE'] = 'skill'
-gp['DM'] = 'info'
-gp['ECOL'] = 'stem'
-gp['ECON'] = 'info'
-gp['ENGL'] = 'soc'
-gp['ENGR'] = 'stem'
-gp['ENVS'] = 'stem'
-gp['ESL'] = 'comm'
-gp['ETHN'] = 'comm'
-gp['FRNH'] = 'comm'
-gp['GEOG'] = 'stem'
-gp['GEOL'] = 'stem'
-gp['GUID'] = 'soc'
-gp['HE'] = 'well'
-gp['HIST'] = 'soc'
-gp['HUM'] = 'soc'
-gp['HVAC'] = 'skill'
-gp['JFT'] = 'skill'
-gp['JLE'] = 'skill'
-gp['JOUR'] = 'comm'
-gp['JPN'] = 'comm'
-gp['KIN'] = 'well'
-gp['LIB'] = 'comm'
-gp['LIFE'] = 'well'
-gp['MATH'] = 'stem'
-gp['MCTV'] = 'art'
-gp['MUS'] = 'art'
-gp['PHIL'] = 'soc'
-gp['PHYS'] = 'stem'
-gp['POLS'] = 'soc'
-gp['PSCI'] = 'stem'
-gp['PSYC'] = 'soc'
-gp['RE'] = 'skill'
-gp['SJS'] = 'soc'
-gp['SOC'] = 'soc'
-gp['SPAN'] = 'comm'
-gp['THEA'] = 'art'
-gp['WELD'] = 'skill'
-gp['WTRM'] = 'skill'
-gp['MGMT'] = 'skill'
-gp['MKTG'] = 'skill'
-gp['HTM'] = 'skill'
-
-dean = {}
-dean['AH'] = 'et'
-dean['HE'] = 'et'
-dean['ATH'] = 'et'
-dean['KIN'] = 'et'
-dean['LIFE'] = 'et'
-dean['AE'] = 'ss'
-dean['APE'] = 'ss'
-dean['ACCT'] = 'ss'
-dean['AJ'] = 'ss'
-dean['AMT'] = 'ss'
-dean['HVAC'] = 'ss'
-dean['JFT'] = 'ss'
-dean['JLE'] = 'ss'
-dean['RE'] = 'ss'
-dean['WTRM'] = 'ss'
-dean['WELD'] = 'ss'
-dean['ANTH'] = 'nl'
-dean['ART'] = 'nl'
-dean['ASTR'] = 'jn'
-dean['BIO'] = 'jn'
-dean['BOT'] = 'ss'
-dean['BUS'] = 'ss'
-dean['CD'] = 'ss'
-dean['CHEM'] = 'jn'
-dean['CMGT'] = 'ss'
-dean['CMUN'] = 'nl'
-dean['COS'] = 'ss'
-dean['CSIS'] = 'ss'
-dean['CWE'] = 'ss'
-dean['DM'] = 'ss'
-dean['ECOL'] = 'jn'
-dean['ECON'] = 'ss'
-dean['ENGL'] = 'nl'
-dean['ENGR'] = 'jn'
-dean['ENVS'] = 'jn'
-dean['ESL'] = 'ss'
-dean['ETHN'] = 'nl'
-dean['FRNH'] = 'nl'
-dean['GEOG'] = 'jn'
-dean['GEOL'] = 'jn'
-dean['GUID'] = 'de'
-dean['HIST'] = 'nl'
-dean['HUM'] = 'nl'
-dean['JOUR'] = 'nl'
-dean['JPN'] = 'nl'
-dean['LIB'] = 'jn'
-dean['MATH'] = 'jn'
-dean['MCTV'] = 'nl'
-dean['MGMT'] = 'ss'
-dean['MKTG'] = 'ss'
-dean['HTM'] = 'ss'
-dean['MUS'] = 'nl'
-dean['PHIL'] = 'nl'
-dean['PHYS'] = 'jn'
-dean['POLS'] = 'nl'
-dean['PSCI'] = 'jn'
-dean['PSYC'] = 'nl'
-dean['PSYCH'] = 'nl'
-dean['SJS'] = 'nl'
-dean['SOC'] = 'nl'
-dean['SPAN'] = 'nl'
-dean['THEA'] = 'nl'
-
-dean_names = {}
-dean_names['et'] = 'Enna Trevathan'
-dean_names['ss'] = 'Susan Sweeney'
-dean_names['nl'] = 'Noah Lystrup'
-dean_names['jn'] = 'Jennifer Nari'
-dean_names['de'] = 'Diego Espinoza'
 
 
 class FetchError(Exception):
@@ -243,13 +76,15 @@ def d(s,end=''):
 
 
 # Main canvas querying fxn    
-def fetch(target,verbose=0,params=0):
+def fetch(target,verbose=0,params=0,media=0):
     # if there are more results, recursivly call myself, adding on to the results.
     results = 0
     if target[0:4] != "http": target = url + target
     if verbose: 
         print("++ Fetching: " + target)
-    if params:
+    if media:
+        r2 = requests.get(target, headers = header_media)
+    elif params:
         r2 = requests.get(target, headers = header, params = params)
     else:
         r2 = requests.get(target, headers = header)
@@ -280,7 +115,7 @@ def fetch(target,verbose=0,params=0):
                 #link = re.sub(r'per_page=10$', 'per_page=100', link)    # link.replace('per_page=10','per_page=500')
                 #if (verbose):  print("++ More link: " + link)
                 
-                nest = fetch(link,verbose,params)
+                nest = fetch(link,verbose,params,media)
                 if isinstance(results,dict): results.update(nest)
                 else: results.extend(nest)
     return results    
@@ -359,85 +194,10 @@ def fetch_collapse(target,collapse='',verbose=0):
     
 
 
-################
-################  SCHEDULE PARSING HELPERS
-################
-################
-################
 
-# Teacher name format changed. Remove commas and switch first to last   
-def fix_t_name(str):
-    str = str.strip()
-    str = re.sub('\s+',' ',str)
-    parts = str.split(', ')
-    if len(parts)>1:
-        return parts[1].strip() + " " + parts[0].strip()
-    return str
+    
+    
 
-# Separate dept and code
-def split_class_dept(c):
-    return c.split(' ')[0]
-def split_class_code(c):
-    num = c.split(' ')[1]
-    parts = re.match('(\d+)([a-zA-Z]+)',num)
-    #ret = "Got %s, " % c
-    if parts:
-        r = int(parts.group(1))
-        #print(ret + "returning %i." % r)
-        return r
-    #print(ret + "returning %s." % num)
-    return int(num)
-def split_class_code_letter(c):
-    num = c.split(' ')[1]
-    parts = re.match('(\d+)([A-Za-z]+)',num)
-    if parts:
-        return parts.group(2)
-    return ''
-    
-# go from sp20 to 2020spring
-def shortToLongSem(s):
-    parts = re.search(r'(\w\w)(\d\d)', s)
-    yr = parts.group(2)
-    season = parts.group(1)
-    seasons = {'sp':'spring','su':'summer','fa':'fall','wi':'winter'}
-    return '20'+yr+seasons[season]
-    
-# Go to the semesters folder and read the schedule. Return dataframe    
-def getSemesterSchedule(short='sp21'):                # I used to be current_schedule
-    # todo: Some semesters have a different format.... partofday   type   site  xxx i just dL'd them again
-    
-    filename = 'cache/semesters/'+shortToLongSem(short)+'/' + short + '_sched.json'
-    print("opening %s" % filename)
-    #openfile = open(filename,'r')
-    #a = json.loads(openfile)
-    #return pd.DataFrame(a)
-    schedule = pd.read_json(filename)
-    schedule.teacher = schedule['teacher'].apply(fix_t_name)
-    #print schedule['teacher']
-    for index,r in schedule.iterrows():
-        tch = r['teacher']
-        parts = tch.split(' . ')
-        if len(parts)>1:
-            #print "Multiple teachers: (" + tch + ")"
-            schedule.loc[index,'teacher'] = parts[0]
-            #print "  Fixed original: ", schedule.loc[index]
-            
-            for t in parts[1:]: 
-                r['teacher'] = t
-                schedule.loc[-1] = r
-                #print "  New row appended: ", schedule.loc[-1]
-    schedule = schedule.assign(dept = schedule['code'].apply(split_class_dept))
-    schedule = schedule.assign(codenum = schedule['code'].apply(split_class_code))
-    schedule = schedule.assign(codeletter = schedule['code'].apply(split_class_code_letter))
-    #print(schedule)
-    schedule['sem'] = short
-    #print schedule.columns
-    return schedule
-
-def get_enrlmts_for_user(user,enrollments):
-    #active enrollments
-    u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
-    return u_en[['type','course_id']]
 
 
 ################
@@ -505,130 +265,6 @@ async def setup_canvas_data_2024():
                     print(f"  - skipping {table} because {e}")
 
 
-# Get something from Canvas Data
-def do_request(path):  
-    #Set up the request pieces
-    method = 'GET'
-    host = 'api.inshosteddata.com'
-    apiTime = dt.utcnow().strftime('%a, %d %b %y %H:%M:%S GMT')
-    apiContentType = 'application/json'
-
-    msgList = []
-    msgList.append(method)
-    msgList.append(host)
-    msgList.append(apiContentType)
-    msgList.append('')
-    msgList.append(path)
-    msgList.append('')
-    msgList.append(apiTime)
-    msgList.append(apiSecret)
-
-    msgStr = bytes("".join("%s\n" % k for k in msgList).strip(),'utf-8')
-
-    sig = base64.b64encode(hmac.new(key=bytes(apiSecret,'utf-8'),msg=msgStr,digestmod=hashlib.sha256).digest())
-    sig = sig.decode('utf-8')
-
-    headers = {}
-    headers['Authorization'] = 'HMACAuth {}:{}'.format(apiKey,sig)
-    headers['Date'] = apiTime
-    headers['Content-type'] = apiContentType
-
-
-    #Submit the request/get a response
-    uri = "https://"+host+path
-    print (uri)
-    print (headers)
-    response = requests.request(method='GET', url=uri, headers=headers, stream=True)
-
-    #Check to make sure the request was ok
-    if(response.status_code != 200):
-        print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
-    else:
-        #Use the downloaded data
-        jsonData = response.json()
-        #print(json.dumps(jsonData, indent=4))
-        return jsonData
-
-# Canvas data, download all new files
-def sync_non_interactive():
-    resp = do_request('/api/account/self/file/sync')    
-    mylog.write(json.dumps(resp, indent=4))
-    #mylog.close()
-    gotten = os.listdir(local_data_folder)
-    wanted = []
-    i = 0
-    for x in resp['files']:
-        filename = x['filename']
-        exi = "No "
-        if filename in gotten: exi = "Yes"
-        else: wanted.append(x)
-        
-        print(str(i) + '.\tLocal: %s\tRemote: %s' % ( exi, filename ))
-        i += 1
-    print("I will attempt to download %i files." % len(wanted))
-    
-    #answer = input("Press enter to begin, or q to quit ")
-    #if not answer == '': return
-    
-    good_count = 0
-    bad_count = 0
-    for W in wanted:
-        print("Downloading: " + W['filename'])
-        response = requests.request(method='GET', url=W['url'], stream=True)
-        if(response.status_code != 200):
-            print('Request response went bad. Got back a %s code, meaning the request was %s' % \
-                 (response.status_code, response.reason))
-            print('URL: ' + W['url'])
-            bad_count += 1
-            
-        else:
-            #Use the downloaded data
-            with open(local_data_folder + W['filename'], 'wb') as fd:
-                for chunk in response.iter_content(chunk_size=128):
-                    fd.write(chunk)
-            print("Success")
-            good_count += 1
-    print("Out of %i files, %i succeeded and %i failed." % (len(wanted),good_count,bad_count))
-        
-
-# list files in canvas_data (online) and choose one or some to download.
-def interactive():
-    resp = do_request('/api/account/self/file/sync')    
-    mylog.write(json.dumps(resp, indent=4))
-    #mylog.close()
-    i = 0
-    gotten = os.listdir(local_data_folder)
-    for x in resp['files']:
-        print(str(i) + '.\t' + x['filename'])
-        i += 1
-    which = input("Which files to get? (separate with commas, or say 'all') ")
-    if which=='all':
-        which_a = list(range(i-1))
-    else:
-        which_a = which.split(",")
-    for W in which_a:
-        this_i = int(W)
-        this_f = resp['files'][this_i]
-        filename = this_f['filename']
-        if filename in gotten: continue
-        print("Downloading: " + filename)
-        response = requests.request(method='GET', url=this_f['url'], stream=True)
-        if(response.status_code != 200):
-            print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
-        else:
-            #Use the downloaded data
-            with open(local_data_folder + filename, 'wb') as fd:
-                for chunk in response.iter_content(chunk_size=128):
-                    fd.write(chunk)
-            print("Success")
-        """if filename.split('.')[-1] == 'gz':
-            try:
-                plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
-                pf = open(plain_filename,'w')
-                with gzip.open('canvas_data/' + filename , 'rb') as f:
-                    pf.write(f.read())    
-            except Exception as e:
-                print "Failed to ungizp. Probably too big: " + str(e)"""
 
 
 
@@ -637,489 +273,13 @@ def interactive():
 
 
 
-######   SSB SCHEDULE
-######
-######
-######
 
-def todays_date_filename():       # helper    
-    n = datetime.now()
-    m = n.month
-    if m < 10: m = "0"+str(m)
-    d = n.day
-    if d < 10: d = "0" + str(d)
-    return "reg_" + short_sem + "_" + str(n.year) + str(m) + str(d)
+  
 
-def nowAsStr():     # possible duplicate
-    #Get the current time, printed in the right format
-    currentTime = datetime.datetime.utcnow()
-    prettyTime = currentTime.strftime('%a, %d %b %Y %H:%M:%S GMT')
-    return prettyTime
-
-
-def row_has_data(r):      # helper    
-    if r.find_all('th'):
-        return False
-    if len(r.find_all('td')) > 2:
-        return True
-    if re.search('Note\:', r.get_text()):
-        return True
-    return False
-
-def row_text(r):   # helper  
-    #global dbg
-    
-    d("Row Txt Fxn gets:  ")
-    arr = []
-    for t in r.find_all('td'):
-        if t.contents and len(t.contents) and t.contents[0].name == 'img':
-            arr.append("1")
-            d("img")
-        r_text = t.get_text()
-        arr.append(r_text)
-        if 'colspan' in t.attrs and t['colspan']=='2':
-            d('[colspan2]')
-            arr.append('')
-        d("\t"+r_text, end=" ")
-    d('')
-    
-    if len(arr)==1 and re.search('Note\:',arr[0]):
-        note_line = clean_funny( arr[0] )
-        note_line = re.sub(r'\n',' ', note_line)
-        note_line = re.sub(r'"','', note_line)
-        #note_line = re.sub(r',','\,', note_line)
-        return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
-    del arr[0]
-    arr[1] = clean_funny(arr[1])
-    arr[2] = clean_funny(arr[2])
-    if arr[1]: arr[1] = arr[1] + " " + arr[2]
-    del arr[2]
-    arr = [ re.sub(r'&nbsp;','',a) for a in arr]
-    arr = [ re.sub(',','. ',a) for a in arr]
-    arr = [ re.sub('\(P\)','',a) for a in arr]
-    arr = [ a.strip() for a in arr]
-    #del arr[-1]
-    r = ','.join(arr)+'\n'
-    r = re.sub('\n','',r)
-    r = re.sub('add to worksheet','',r)
-    d("Row Txt Fxn returns:  " + r + "\n\n")
-
-    return r + '\n'
-
-
-
-# Take banner's html and make a csv(?) file  
-def ssb_to_csv(src):
-    #out = codecs.open(schedfile,'w','utf-8')
-    output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
-    b = bs(src, 'html.parser')
-    tab = b.find(class_="datadisplaytable")
-    if not tab:
-        print("hmm... didn't find a 'datadisplaytable' in this html: ")
-        #print(src)
-        return 0
-    rows = tab.find_all('tr')
-    drows = list(filter(row_has_data,rows))
-    for dd in drows:
-        t = row_text(dd)
-        output += t
-    return output
-    
-
-        
-def clean_funny(str):
-    if str and str.encode('utf8') == ' ': return ''
-    return str
-def clean_funny2(str):
-    if str and str == '\xa0': return ''
-    if str and str == ' ': return ''
-    return str
-    
-def clean_funny3(str):
-    return re.sub('\xa0','',str)
-
-
-
-### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section     
-def course_start(course):
-    #todo: use this to make a early/late/short field and store semester dates w/ other constants
-    
-    start = datetime(2019,1,28)
-    end   = datetime(2019,5,24)
-
-    # is it normal, early, late, winter?
-    li = course[0]
-    date = li[12]
-    
-    if date=='01/28-05/24':
-        return 'Normal'
-    if date=='TBA':
-        return 'TBA'
-    if date=='01/02-01/25':
-        return 'Winter'
-    if date=='01/02-01/24':
-        return 'Winter'
-    
-    ma = re.search(  r'(\d+)\/(\d+)\-(\d+)\/(\d+)', date)
-    if ma:
-        # TODO do these years matter?
-        mystart = datetime(2019, int(ma.group(1)), int(ma.group(2)))
-        if int(ma.group(1)) > 10: mystart = datetime(2018, int(ma.group(1)), int(ma.group(2)))
-        myend   = datetime(2019, int(ma.group(3)), int(ma.group(4)))
-        length = myend - mystart
-        weeks = length.days / 7
-        
-        if mystart != start:
-            if mystart < start:
-                #print 'Early Start ', str(weeks), " weeks ",
-                return 'Early start'
-            else:
-                #print 'Late Start ', str(weeks), " weeks ",
-                return 'Late start'
-        else:
-            if myend > end:
-                #print 'Long class ', str(weeks), " weeks ",
-                return 'Long term'
-            else:
-                #print 'Short term ', str(weeks), " weeks ",
-                return 'Short term'
-        #return ma.group(1) + '/' + ma.group(2) + " end: " + ma.group(3) + "/" + ma.group(4)
-    else:
-        return "Didn't match: " + date
-
-        
-def time_to_partofday(t):
-    #todo: account for multiple sites/rows
-    # 11:20 am-12:10 pm
-    mor     = strptime('12:00 PM', '%I:%M %p')
-    mid     = strptime( '2:00 PM', '%I:%M %p')
-    aft     = strptime( '6:00 PM', '%I:%M %p')
-    if t == 'TBA':
-        return 'TBA'
-    t = t.upper()    
-    parts = t.split('-')
-    try:
-        begin = strptime(parts[0], '%I:%M %p')
-        end = strptime(parts[1], '%I:%M %p')
-        if end > aft:
-            return "Evening"
-        if end > mid:
-            return "Afternoon"
-        if end > mor:
-            return "Midday"
-        return "Morning"
-        #return begin,end
-    except Exception as e:
-        #print 'problem parsing: ', t, "   ",
-        return ""
-
-# Deduce a 'site' field, based on room name and known offsite locations    
-def room_to_site(room,verbose=0):
-    #todo: account for multiple sites/rows
-    #todo: better way to store these offsite labels
-    othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')    
-    # is it gilroy, mh, hol, other, online or hybrid?
-    site = 'Gilroy'
-    #if len(course[0]) > 13:
-    #    room = course[0][13]
-    if room in othersites:
-        site = "Other"
-    if room == 'TBA':
-        site = 'TBA'
-    if room == 'AV':
-        site = 'San Martin Airport'
-    if re.search('MHG',room):
-        site = 'Morgan Hill'
-    if re.search('HOL',room):
-        site = 'Hollister'
-    if re.search('COY',room):
-        site = 'Coyote Valley'
-    if re.search('OFFSTE',room):
-        site = 'Other'
-    if re.search('ONLINE',room):
-        site = 'Online'
-    if verbose: print(room, '\t', end=' ') 
-    return site
-    
-
-from io import StringIO
 
     
-# take text lines and condense them to one dict per section    
-def to_section_list(input_text,verbose=0):
-    this_course = ''
-    #todo: no output files
-    #jout = codecs.open(filename, 'w', 'utf-8')
-    #input = csv.DictReader(open(schedfile,'r'))
-    #input = UnicodeDictReader(input_text.splitlines())
-    all_courses = []
-    
-    
-    try:
-        f = StringIO(input_text)
-    except:
-        print("ERROR with this input_text:")
-        print(input_text)
-    reader = csv.reader(f, delimiter=',')
-    headers = next(reader)
-    for r in reader:
-        d = dict(list(zip(headers,r)))
-        #pdb.set_trace()
-        # clean funny unicode char in blank entries
-        r = {k: clean_funny2(v) for k,v in list(d.items()) }
-        if verbose: print("Cleaned: " + str(r))
-        
-        if 'time' in r:
-            if r['time']=='TBA': r['time'] = ''
-            if r['time']: r['partofday'] = time_to_partofday(r['time'])
-            
-        r['type'] = ''
-        
-        if 'loc' in r:
-            if r['loc'] == 'ONLINE': r['type'] = 'online'
-            if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
-            if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
-            if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
-        
-        if 'code' in r:
-            if re.search(r'ONLINE\sLIVE',r['code']):
-                r['type'] = 'online live'
-            elif re.search(r'ONLINE',r['code']):
-                r['type'] = 'online'
-                
-        # does it have a section? it is the last course
-        if r['crn']:   # is a new course or a continuation?
-            if verbose: print("  it's a new section.")
-            if this_course:
-                if not this_course['extra']: this_course.pop('extra',None)
-                all_courses.append(this_course)
-            this_course = r
-            #print(r['name'])
-            this_course['extra'] = []
-        else:
-            # is a continuation line
-            if verbose: print("  additional meeting: " + str(r))
-            for k,v in list(r.items()):
-                if not v: r.pop(k,None)
-            # TODO: if extra line is different type?
-            #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
-            #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
-            this_course['extra'].append(r)
-    return all_courses
-    
+  
 
-# Schedule / course filling history
-# csv headers:  crn, code, teacher,  datetime, cap, act, wlcap, wlact
-# Log the history of enrollments per course during registration
-def log_section_filling(current_sched_list):
-    rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
-    rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
-    print(rows_j)
-    now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
-    csv_fn = 'cache/reg_history_' + short_sem + '.csv'
-    with codecs.open(csv_fn,'a','utf-8') as f:
-        writer = csv.writer(f)
-        for S in current_sched_list:
-            #print(S)
-            items = [now,]
-            [ items.append( S[X] ) for X in rows_j ]
-            writer.writerow(items)
-            
-# Same as above, but compressed, act only
-def log_section_filling2(current_sched_list):
-
-
-
-    now = datetime.datetime.now().strftime('%Y-%m-%dT%H')
-    
-    todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
-    #print(todays_data)
-    
-    todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
-    todays_df = todays_df.rename_axis('crn')
-    #print(todays_df)
-    todays_df.to_csv('cache/reg_today_new.csv', index=True)
-    
-    try:
-        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
-        print(myframe)
-    except:
-        fff = open('cache/reg_data_'+short_sem+'.csv','w')
-        fff.write('crn\n')
-        fff.close()
-        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
-        #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
-        #myframe = myframe.rename_axis('crn')
-        print("Creating new data file for this semester.")
-    
-    new_df = myframe.join( todays_df, on='crn', how='outer' )
-    new_df = new_df.rename_axis('crn')
-    print(new_df)
-    
-    reg_data_filename = 'reg_data_' + short_sem + '.csv'
-    new_df.to_csv('cache/' + reg_data_filename, index=False)
-    put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0)                
-    
-
-# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
-def scrape_schedule():
-                                  
-    #url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu"
-    url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
-                    
-                 
-    text = ''
-    
-    from selenium import webdriver
-    from selenium.webdriver.common.keys import Keys
-    from selenium.webdriver.support.ui import WebDriverWait, Select
-    from selenium.webdriver.common.by import By
-    from selenium.webdriver.support import expected_conditions as EC
-
-    try:
-        driver = webdriver.Firefox()
-        driver.get(url)
-        driver.find_element_by_id("UserID").clear()
-        driver.find_element_by_id("UserID").send_keys(GOO)
-        driver.find_element_by_name("PIN").send_keys(GOO_PIN)
-        driver.find_element_by_name("loginform").submit()
-        driver.implicitly_wait(5)
-        
-        print(driver.title)
-        
-        driver.find_element_by_link_text("Students").click()
-        driver.implicitly_wait(5)
-        print(driver.title)
-        
-        driver.find_element_by_link_text("Registration").click()
-        driver.implicitly_wait(5)
-        print(driver.title)
-        
-        driver.find_element_by_link_text("Search for Classes").click()
-        driver.implicitly_wait(15)
-        print(driver.title)
-        
-        dd = Select(driver.find_element_by_name("p_term"))
-        if (dd):
-            dd.select_by_visible_text(SEMESTER)    
-            driver.find_element_by_xpath("/html/body/div/div[4]/form").submit()
-            driver.implicitly_wait(15)
-            print(driver.title)
-
-        driver.find_element_by_xpath("/html/body/div/div[4]/form/input[18]").click()   
-        driver.implicitly_wait(10)
-        print(driver.title)
-        
-        driver.find_element_by_name("SUB_BTN").click()
-        driver.implicitly_wait(40)
-        time.sleep(15)
-        driver.implicitly_wait(40)
-        print(driver.title)
-        text = driver.page_source
-        driver.quit()
-        
-    except Exception as e:
-        print("Got an exception: ", e)
-    finally:
-        print("")
-        #driver.quit()
-    
-          
-                                                   
-                                      
-              
-    
-    codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
-    
-    
-    
-    #print(text)
-    as_list = ssb_to_csv(text)
-    #print(as_list)
-    as_dict = to_section_list(as_list)
-    jj = json.dumps(as_dict,indent=2)
-    
-    # TODO
-    try:
-        ps = codecs.open('cache/'+filename,'r','utf-8')
-        prev_sched = json.loads(ps.read())
-        ps.close()
-        
-        if 1:   # sometimes I want to re-run this without affecting the logs.
-            log_section_filling(as_dict)
-            log_section_filling2(as_dict)
-        
-        dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
-        pretty_json = json.dumps(  json.loads( dd.to_json() ), indent=2 )
-        codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write(  pretty_json )    # dd.to_json() )
-        
-    except Exception as e:
-        print(e)
-        print("Can't do diff?")
-    
-    # Next, rename the prev sched_xxYY.json data file to have its date,
-    # make this new one, and then upload it to the website. 
-    # Maybe even count the entries and do a little sanity checking
-    #
-    # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
-    # print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
-    
-                                       
-    try:
-        last_mod = time.ctime(os.path.getmtime('cache/' + filename))
-        
-        import pathlib
-        prev_stat = pathlib.Path('cache/' + filename).stat()
-        mtime = dt.fromtimestamp(prev_stat.st_mtime)
-        print(mtime)
-    except:
-        print("Couldn't Diff.")
-    # fname = pathlib.Path('test.py')
-    # assert fname.exists(), f'No such file: {fname}'  # check that the file exists
-    # print(fname.stat())
-    #
-    # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413, 
-    #                st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
-
-
-
-    codecs.open('cache/' + filename, 'w', 'utf-8').write(jj)
-    
-    put_file('/home/public/schedule/', 'cache/', filename, 0)                             #  /gavilan.edu/_files/php/
-    
-    return as_dict
- 
-def dza_sched():
-    text = codecs.open('cache/sched_fa22_deanza.html','r','utf-8').read()
-    as_list = ssb_to_csv(text)
-    #print(as_list)
-    as_dict = to_section_list(as_list)
-    jj = json.dumps(as_dict,indent=2)
-    codecs.open('cache/fa22_sched_deanza.json','w','utf-8').write(jj)
-    
-# recreate schedule json files with most current online schedule format. 
-def recent_schedules():
-    #  # todo: sems is a global in this file. Is that the right thing to do?   
-    #all_scheds = [ os.listdir( 'cache/rosters/' + shortToLongSem(s)) for s in sems ]
-    #for i,s in enumerate(sems):
-    for s in ['sp21',]:
-        filename = 'cache/sched_' + s + '.html'
-        print("Filename is %s" % filename)
-        input = codecs.open( filename, 'r', 'utf-8').read()
-        output = ssb_to_csv(input)
-        
-        csv_fn = 'cache/temp_sched_' + s + '.csv'
-        if os.path.isfile(csv_fn):
-            os.remove(csv_fn)
-        
-        codecs.open(csv_fn,'w','utf-8').write(output)
-        
-        jsn = to_section_list(output)
-        jsn_fn = 'cache/semesters/'+shortToLongSem(s)+'/'+s+'_sched.json'
-        if os.path.isfile(jsn_fn):
-            os.remove(jsn_fn)
-        codecs.open(jsn_fn,'w').write(json.dumps(jsn))
-    print("I put the most recent schedule JSON files in ./cache/semesters/... folders.")
 
 
 
@@ -1315,48 +475,94 @@ def fetch_current_rosters_auto():
         time.sleep(1)
 
 
-
+# Canvas data, download all new files
+def sync_non_interactive():
+    resp = do_request('/api/account/self/file/sync')    
+    mylog.write(json.dumps(resp, indent=4))
+    #mylog.close()
+    gotten = os.listdir(local_data_folder)
+    wanted = []
+    i = 0
+    for x in resp['files']:
+        filename = x['filename']
+        exi = "No "
+        if filename in gotten: exi = "Yes"
+        else: wanted.append(x)
+        
+        print(str(i) + '.\tLocal: %s\tRemote: %s' % ( exi, filename ))
+        i += 1
+    print("I will attempt to download %i files." % len(wanted))
     
-# read schedule file with an eye toward watching what's filling up
-def schedule_filling():
-    sem = 'spring2021'    # todo: hardcoded
-    days = []
-    for f in sorted(os.listdir('cache/rosters/'+sem+'/')):
-        if f.endswith('.html'):
-            match = re.search(r'sched_(\d\d\d\d)_(\d\d)_(\d+)\.html',f)
-            if match:
-                print(f)
-                y = match.group(1)
-                m = match.group(2)
-                d = match.group(3)
-                print("Schedule from %s %s %s." % (y,m,d))
-                csv_sched = ssb_to_csv(open('cache/rosters/'+sem+'/'+f,'r').read())
-                jsn = to_section_list(csv_sched)
-                #print(json.dumps(jsn,indent=2))
-                days.append(jsn)
-    day1 = days[-2]
-    day2 = days[-1]
-    df = jsondiff.diff(day1, day2)
-    gains = defaultdict( list )
+    #answer = input("Press enter to begin, or q to quit ")
+    #if not answer == '': return
     
-    for D in df.keys():
-        if isinstance(D, int):
-            #print(day1[D]['code'] + '\t' + day1[D]['crn'] + ' Before: ' + day1[D]['act'] + ' After: ' + day2[D]['act'])
-            try:
-                gain = int(day2[D]['act']) - int(day1[D]['act'])
-                gains[gain].append(  day1[D]['code'] + ' ' + day1[D]['crn'] )
-            except:
-                print("No gain for " + str(D))
-            #print("\t" + str(df[D]))
+    good_count = 0
+    bad_count = 0
+    for W in wanted:
+        print("Downloading: " + W['filename'])
+        response = requests.request(method='GET', url=W['url'], stream=True)
+        if(response.status_code != 200):
+            print('Request response went bad. Got back a %s code, meaning the request was %s' % \
+                 (response.status_code, response.reason))
+            print('URL: ' + W['url'])
+            bad_count += 1
+            
         else:
-            print(D)
-            print(df[D])
-    for key, value in sorted(gains.items(), key=lambda x: x[0]): 
-        print("{} : {}".format(key, value))
+            #Use the downloaded data
+            with open(local_data_folder + W['filename'], 'wb') as fd:
+                for chunk in response.iter_content(chunk_size=128):
+                    fd.write(chunk)
+            print("Success")
+            good_count += 1
+    print("Out of %i files, %i succeeded and %i failed." % (len(wanted),good_count,bad_count))
+        
     
-    #print(json.dumps(gains,indent=2))
+## OLD STYLE CANVAS DATA
+
+# Get something from Canvas Data
+def do_request(path):  
+    #Set up the request pieces
+    method = 'GET'
+    host = 'api.inshosteddata.com'
+    apiTime = datetime.utcnow().strftime('%a, %d %b %y %H:%M:%S GMT')
+    apiContentType = 'application/json'
+
+    msgList = []
+    msgList.append(method)
+    msgList.append(host)
+    msgList.append(apiContentType)
+    msgList.append('')
+    msgList.append(path)
+    msgList.append('')
+    msgList.append(apiTime)
+    msgList.append(apiSecret)
+
+    msgStr = bytes("".join("%s\n" % k for k in msgList).strip(),'utf-8')
+
+    sig = base64.b64encode(hmac.new(key=bytes(apiSecret,'utf-8'),msg=msgStr,digestmod=hashlib.sha256).digest())
+    sig = sig.decode('utf-8')
+
+    headers = {}
+    headers['Authorization'] = 'HMACAuth {}:{}'.format(apiKey,sig)
+    headers['Date'] = apiTime
+    headers['Content-type'] = apiContentType
+
+
+    #Submit the request/get a response
+    uri = "https://"+host+path
+    print (uri)
+    print (headers)
+    response = requests.request(method='GET', url=uri, headers=headers, stream=True)
+
+    #Check to make sure the request was ok
+    if(response.status_code != 200):
+        print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
+    else:
+        #Use the downloaded data
+        jsonData = response.json()
+        #print(json.dumps(jsonData, indent=4))
+        return jsonData
 
-    
 
 
 
@@ -1921,381 +1127,34 @@ def get_doc_generic(docid, bracket=1, verbose=0):
 
 
 
-def scrape_schedule_py():
-    return 1
-    
-    """
-    cur_session = requests.Session() 
-    mygav_url = "https://lum-prod.ec.gavilan.edu/"
-    
-    r1 = requests.get(mygav_url)
-    
-    login_url1 = "https://lum-prod.ec.gavilan.edu/c/portal/login"
-    
-    
-    login_url = "https://eis-prod.ec.gavilan.edu/authenticationendpoint/login.do?commonAuthCallerPath=%2Fsamlsso&forceAuth=false&passiveAuth=false&tenantDomain=carbon.super&sessionDataKey=57203341-6823-4511-b88e-4e104aa2fd71&relyingParty=LP5PROD_LuminisPortalEntity&type=samlsso&sp=Luminis+Portal+PROD&isSaaSApp=false&authenticators=BasicAuthenticator:LOCAL"
-    """
-
-
-
-def scrape_schedule_multi():
-    
-    global SEMESTER, short_sem, semester_begin, filename, filename_html
-        
-    SEMESTER = 'Spring 2025'
-    short_sem = 'sp25'
-    semester_begin = strptime('01/27', '%m/%d')
-    filename = 'sp25_sched.json'
-    filename_html = 'sp25_sched.html'
-    
-    SEM = ['Fall 2022', 'Summer 2022 (View only)', 'Spring 2022 (View only)',
-           'Fall 2021 (View only)', 'Summer 2021 (View only)', 'Spring 2021 (View only)', 'Fall 2020 (View only)', 'Summer 2020 (View only)', 'Spring 2020 (View only)',
-           'Fall 2019 (View only)', 'Summer 2019 (View only)', 'Spring 2019 (View only)', 'Fall 2018 (View only)', 'Summer 2018 (View only)', 'Spring 2018 (View only)' ]
-                                
-                                     
-    
-    srt =  'fa22,su22,sp22,fa21,su21,sp21,fa20,su20,sp20,fa19,su19,sp19,fa18,su18,sp18'.split(',')
-    beg = ['08/22','06/13','01/31','08/23','06/14','02/01','08/24','06/15','01/27','08/26','06/17','01/28','08/27','06/18','01/29']
-    
-    #for i in [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]:
-
-    #SEMESTER = SEM[i]
-    #short_sem = srt[i]
-    #semester_begin = strptime(beg[i], '%m/%d')
-    #filename = '%s_sched.json' % short_sem
-    #filename_html = '%s_sched.html' % short_sem
-    
-    as_dict = scrape_schedule()
-    
-    expanded = list_latestarts(short_sem)
-    fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
-    
-    ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
-    with ffcsv as csvfile:
-        csvwriter = csv.writer(csvfile)
-        csvwriter.writerow(fields)
-        
-        for S in expanded:
-            parts = S['code'].split(' ')
-            S['dept'] = parts[0]
-            S['num'] = parts[1]
-            S['gp'] = gp[parts[0]]
-            S['dean'] = dean[parts[0]]
-            S['sem'] = short_sem
-            # S['act'] = S['cap']
-            if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
-            csvwriter.writerow( [ S[x] for x in fields ] )
-    
-    put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0) 
-
-
-    
-def scrape_for_db():
-    
-    global SEMESTER, gp, dean, short_sem, semester_begin, filename, filename_html
-    fields = 'sem,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc'.split(',')
-
-
-    """
-    SEMESTER = 'Fall 2022'
-    short_sem = 'fa22'
-    semester_begin = strptime('08/22', '%m/%d')
-    filename = 'fa22_sched.json'
-    filename_html = 'fa22_sched.html'
-    
-    as_dict = scrape_schedule()
-    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
-    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
-    for S in as_dict:
-        parts = S['code'].split(' ')
-        S['dept'] = parts[0]
-        S['num'] = parts[1]
-        S['gp'] = gp[parts[0]]
-        S['dean'] = dean[parts[0]]
-        S['sem'] = short_sem
-        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
-              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
-        print(str)
-        fff.write(str)
-    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
-    fff.close()
-    """
-
-
-
-
-    SEMESTER = 'Spring 2023 (View only)'
-    short_sem = 'sp23'
-    semester_begin = strptime('01/30', '%m/%d')
-    filename = 'sp23_sched.json'
-    filename_html = 'sp23_sched.html'
-    
-    as_dict = scrape_schedule()
-    fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
-    fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
-    for S in as_dict:
-        parts = S['code'].split(' ')
-        S['dept'] = parts[0]
-        S['num'] = parts[1]
-        S['gp'] = gp[parts[0]]
-        S['dean'] = dean[parts[0]]
-        S['sem'] = short_sem
-        str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
-              ", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
-        print(str)
-        fff.write(str)
-    fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
-    fff.close()
-    
-def argos_data():
-    global dean,gp
-    
-    f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8')
-    writer = csv.writer(f2)
-    headers = 'gp dean dept num code crn name act site'.split(' ')
-    writer.writerow(headers)
-    
-    f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8')
-    reader = csv.reader(f, delimiter=',')
-    headers = next(reader)
-    for r in reader:
-        d = dict(list(zip(headers,r)))
-        print(d)
-        my_dean = dean[d['Subj']]
-        my_gp = gp[d['Subj']]
-        dept = d['Subj']
-        num = d['Crse No']
-        code = dept + " " + num
-        crn = d['CRN']
-        name = d['Course Title']
-        act = d['Open Seats']
-        campus = d['Campus']
-        session = d['Session']
-        if campus == "Off Campus": site = session
-        else: site = campus
-        print(site)
-        writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
-
-def days_times(s):
-    parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s)
-    if parts:
-        day = parts.group(1)
-        time = parts.group(2)
-        parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time)
-        if parts2:
-            time_start = parts2.group(1).strip()
-            time_end = parts2.group(2).strip()
-            return day, time_start, time_end
-        return day, time, ''
-    return '','',''
-
-def remove_year(s):
-    s = re.sub(r'\-', '/', s)
-    if len(s)>5: return s[5:]
-    return s
-
-def argos_data_from_cvc():
-    global dean,gp
-    short_sem = 'fa23'
-    
-    f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8')
-    all_courses = []
-    
-    f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8')
-    reader = csv.reader(f, delimiter=',')
-    headers = next(reader)
-    for r in reader:
-        d = dict(list(zip(headers,r)))
-        #print(d)
-        parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code'])
-        if parts:
-            dept = parts.group(1)
-            num = parts.group(2)
-        my_dean = dean[dept]
-        my_gp = gp[dept]
-        code = dept + " " + num
-        crn = d['CRN']
-        cred = d['Units_Credit_hours']
-        days, time_start, time_end = days_times(d['Meeting_Days_and_Times'])
-        times = ""
-        if time_start: times = time_start + "-" + time_end
-        date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date'])
-        start = remove_year(d['Start_Date'])
-        end = remove_year(d['End_Date'])
-        ztc = d['ZTC']
-        name = d['Course_Name']
-        cap = d['Class_Capacity']
-        rem = d['Available_Seats']
-        act = int(cap) - int(rem)
-        teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name']
-        delivery = d['Delivery']
-        if delivery == "Online":
-            if days:
-                site = "Online"
-                type = "online live"
-                loc = "Online Live"
-            else:
-                site = "Online"
-                type = "online"
-                loc = "ONLINE"
-        elif delivery == "Hybrid":
-            site = d['Campus_College']
-            type = "hybrid"
-            loc = d['Meeting_Locations']
-        else:
-            site = d['Campus_College']
-            type = "in-person"
-            loc = d['Meeting_Locations']
-        this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \
-                       "cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \
-                        "start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp}
-        all_courses.append(this_course)
-        print(site)
-        #writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
-    print(all_courses)
-    #print(json.dumps(all_courses))
-    f3.write( json.dumps(all_courses,indent=2) )
-    f3.close()
-    expanded = list_latestarts(short_sem)
-
-
-
-def expand_old_semesters():
-    
-    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20,fa20,sp21,su21,fa21,sp22,su22,fa22'.split(',')
-    terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20'.split(',')
-    terms.reverse()
-    
-    for t in terms:
-        list_latestarts(t)
-        input('press return to continue.')
-
-# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
-def list_latestarts(term="fa23"):
-    
-    show_summary = 1
-    
-    the_year = '20' + term[2:4]
-    print("year: ", the_year, "  semester: ", term)
-    
-    #term_in = "cache/%s_sched.json" % term               
-    term_out = "cache/%s_latestarts.txt" % term
-    expanded_out = "%s_sched_expanded.json" % term
-    print("Writing output to " + term_out)
-    #infile = codecs.open(term_in, "r", "utf-8")
-    outfile = codecs.open(term_out, "w", "utf-8")
-    exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
-    expanded = []
-    #sched = json.loads(infile.read())
-
-
-    sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
-    #print sched 
-    by_date = {}
-    
-    if show_summary: print("course \t loc \t type \t time")
-    
-    for C in sched:
-        if (not C['type']) and C['loc'] != 'ONLINE':  # and C['time']:
-            C['type'] = 'in-person'
-            
-        if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
-        
-        if 'extra' in C:
-            if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
-                C['type'] = 'hybrid'
-        
-        times = C['time'].split("-")
-        if len(times) > 1:
-            time_start = times[0]
-            time_end = times[1]
-
-            try:
-                startt = time.strptime(time_start,"%I:%M %p")
-                endt = time.strptime(time_end,"%I:%M %p")
-                min_start = startt.tm_min
-                min_end = endt.tm_min
-                if min_start == 0: min_start = "00"
-                else: min_start = str(min_start)
-                if min_end == 0: min_end = "00"
-                else: min_end = str(min_end)
-                C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
-                C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
-                if 0:
-                    print("+  Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
-            except Exception as e:
-                print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
-        else:
-            C['time_start'] = ''
-            C['time_end'] = ''
-            
-        if re.search('TBA',C['date']): 
-            C['start'] = ''
-            C['end'] = ''
-            C['doy'] = ''
-            expanded.append(C)
-            continue
-
-        parts = C['date'].split("-") 
-        start = parts[0] + "/" + the_year
-        end = parts[1] + "/" + the_year
-
-        try:
-            startd = parser.parse(start)
-            endd = parser.parse(end)
-            C['start'] = "%i-%i" % (startd.month,startd.day)
-            C['end'] = "%i-%i" % (endd.month,endd.day)
-            C['doy'] = startd.timetuple().tm_yday
-            expanded.append(C)
-        except Exception as e:
-            print(e, "\n-- problem parsing ", start, " or ", end)
-        if not startd in by_date:
-            by_date[startd] = []
-        by_date[startd].append(C)
-        
-    exoutfile.write( json.dumps(expanded,indent=2) )
-    exoutfile.close()
-    put_file('/home/public/schedule/', 'cache/', expanded_out, 0)  
-    
-    for X in sorted(by_date.keys()):
-        #print("Start: ", X)
-        if len(by_date[X]) < 200:
-            prettydate = X.strftime("%A, %B %d")
-            #print(prettydate + ": " + str(len(by_date[X])) + " courses")
-            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
-            for Y in by_date[X]:
-                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
-                #print(Y)
-                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
-                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
-    outfile.close()
-    put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)  
-    return expanded
-
-
-
 
 
 if __name__ == "__main__":
     
     print ('')
-    options = { 1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,  
-                2: ['Fetch rosters',fetch_current_rosters] , 
-                3: ['Fetch rosters on schedule',fetch_current_rosters_auto] , 
-                4: ['Compute how registration is filling up classes', schedule_filling] , 
-                5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 
-                6: ['Canvas data: interactive sync', interactive ], 
-                7: ['Canvas data: automated sync', sync_non_interactive ], 
-                8: ['Get canvas data 2024 style', canvas_data_2024_run ],
-                9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
-                16: ['Scrape schedule from ssb', scrape_schedule_multi ], 
-                14: ['Generate latestart schedule', list_latestarts ],
-                15: ['Test ssb calls with python', scrape_schedule_py ], 
-                10: ['schedule to db', scrape_for_db ], 
-                11: ['clean argos draft schedule file', argos_data_from_cvc],
-                12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
-                13: ['Parse deanza schedule', dza_sched ],
-              }
+    options = { 1: ['Fetch rosters on schedule',fetch_current_rosters_auto] ,  
+                2: ['Get canvas data 2024 style', canvas_data_2024_run ],
+                3: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
+    }
+        
+    '''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,  
+    2: ['Fetch rosters',fetch_current_rosters] , 
+    3: 
+    4: ['Compute how registration is filling up classes', schedule_filling] , 
+    5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 
+    6: ['Canvas data: interactive sync', interactive ], 
+    7: ['Canvas data: automated sync', sync_non_interactive ], 
+    8: 
+    9: 
+    16: ['Scrape schedule from ssb', scrape_schedule_multi ], 
+    14: ['Generate latestart schedule', list_latestarts ],
+    15: ['Test ssb calls with python', scrape_schedule_py ], 
+    10: ['schedule to db', scrape_for_db ], 
+    11: ['clean argos draft schedule file', argos_data_from_cvc],
+    12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
+    13: ['Parse deanza schedule', dza_sched ],
+    '''
+              
     
     if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
         resp = int(sys.argv[1])
diff --git a/queries.sql b/queries.sql
index 2e61a94..ac29fe8 100644
--- a/queries.sql
+++ b/queries.sql
@@ -3,6 +3,17 @@
 # TODO   students enrolled in fall 2020 
 
 
+# People with custom email
+select u.name, u.last_logged_out, u.created_at, u.updated_at, cc.path from canvas.users u 
+join canvas.communication_channels cc on u.id=cc.user_id 
+where cc.path_type='email' and cc.path not like '%gavilan.edu'
+order by u.last_logged_out;
+
+# how many
+select count(u.name) from canvas.users u join canvas.communication_channels cc on u.id=cc.user_id where cc.path_type='email' and cc.path not like '%gavilan.edu';
+
+
+
 ## Fall 2020 students with how many classes theyre taking
 
 SELECT u.canvasid, u.name, u.sortablename, COUNT(e.id) AS num FROM enrollment AS e
diff --git a/schedules.py b/schedules.py
index 317cffd..b5a0b6a 100644
--- a/schedules.py
+++ b/schedules.py
@@ -3,11 +3,793 @@
 # experimenting with manipulating and querying the schedule of courses
 
 
-import json, re, sys, os
+#from telnetlib import GA
+from sqlite3 import Row
+import json, re, sys, os, codecs, csv, time,requests
+#from tkinter.tix import ROW
+from typing import Generator
+from pathlib import Path
+from bs4 import BeautifulSoup as bs
 from datetime import datetime
 from fast_autocomplete import AutoComplete
+from io import StringIO
+from time import strptime
+from deepdiff import DeepDiff
+from datetime import datetime as dt
+from dateutil import parser                          
+from util import fix_t_name, split_class_dept, split_class_code, split_class_code_letter
+
+import pandas as pd
+
+from semesters import short_to_long
+from pipelines import put_file
+
+from collections import defaultdict
+from semesters import short_to_sis, sis_to_human
+from localcache2 import everyone_teacher_role, iLearn_name_from_goo
+import funcy
+
+from canvas_secrets import GOO, GOO_PIN
+
+DEBUG = 0
+
+def d(s,end=''): 
+    global DEBUG
+    if end and DEBUG: print(s,end=end)
+    elif DEBUG: print(s)
+
+
+
+##
+## DEPTS, GUIDED PATHWAY CLUSTERS, AREAS and DEANS
+def campus_dept_hierarchy():
+    courses_csv = '''Course,GP,AreaCode
+ACCT,info,cwp
+AE,skill,cwp
+AH,well,nah
+AJ,skill,cwp
+AMT,skill,cwp
+ANTH,soc,ahss
+APE,skill,cwp
+ART,art,ahss
+ASTR,stem,stem
+ATH,well,nah
+BIO,stem,stem
+BIOT,stem,stem
+BOT,info,cwp
+BUS,info,cwp
+CARP,skill,cwp
+CD,skill,cwp
+CHEM,stem,stem
+CHN,comm,ahss
+CMGT,skill,cwp
+CMUN,comm,ahss
+COMM,comm,ahss
+COMMC,comm,ahss
+COS,skill,cwp
+CSIS,stem,cwp
+CUL,skill,cwp
+CWE,skill,cwp
+DE,comm,stem
+DM,info,cwp
+DRLT,skill,cwp
+ECOL,stem,stem
+ECON,info,cwp
+ENGL,soc,ahss
+ENGLC,soc,ahss
+ENGR,stem,stem
+ENVS,stem,stem
+ESL,comm,cwp
+ETHN,comm,ahss
+FRNH,comm,ahss
+GEOG,stem,stem
+GEOL,stem,stem
+GUID,soc,c
+HE,well,nah
+HIST,soc,ahss
+HUM,soc,ahss
+HVAC,skill,cwp
+CGD,skill,cwp
+JFT,skill,cwp
+JLE,skill,cwp
+JOUR,comm,ahss
+JPN,comm,ahss
+KIN,well,nah
+LIB,comm,stem
+LIFE,well,nah
+MATH,stem,stem
+STATC,stem,stem
+STAT,stem,stem
+MCTV,art,ahss
+FTVE,art,ahss
+MUS,art,ahss
+PHIL,soc,ahss
+PHYS,stem,stem
+POLS,soc,ahss
+POLSC,soc,ahss
+PSCI,stem,stem
+PSYC,soc,ahss
+PSYCC,soc,ahss
+RE,skill,cwp
+SJS,soc,ahss
+SOC,soc,ahss
+SPAN,comm,ahss
+THEA,art,ahss
+WELD,skill,cwp
+HORT,skill,cwp
+WTRM,skill,cwp
+MGMT,skill,cwp
+MKTG,skill,cwp
+HTM,skill,cwp'''
+
+    areas_csv = '''Area,AreaCode,DeanCode,DeanName
+Nursing and Allied Health,nah,et,[Dean: Nursing/Allied Health]
+Career Education and Workforce Pathways,cwp,ss,Vins Chacko
+Arts Humanities and Social Sciences,ahss,nl,[Dean: AHSS]
+Counseling,c,de,Diego Espinoza
+Student Support and Special Programs,sssp,de,Diego Espinoza
+Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari'''
+
+
+    courses_df = pd.read_csv(StringIO(courses_csv))
+    areas_df = pd.read_csv(StringIO(areas_csv))
+
+    # Recreate gp dictionary
+    course_to_gp = dict(zip(courses_df['Course'], courses_df['GP']))
+
+    # Recreate area dictionary
+    course_to_area = dict(zip(courses_df['Course'], courses_df['AreaCode']))
+
+    # Recreate areas dictionary (mapping AreaCode to full Area Name)
+    areacode_to_area = dict(zip(areas_df['AreaCode'], areas_df['Area']))
+
+    # Recreate dean dictionary (mapping Course -> DeanCode)
+    area_to_dean = dict(zip(areas_df['AreaCode'], areas_df['DeanCode']))
+    course_to_dean = {course: area_to_dean[area_code] for course, area_code in course_to_area.items()}
+
+    # Recreate dean_names dictionary
+    dean_code_to_name = dict(zip(areas_df['DeanCode'], areas_df['DeanName']))
+
+    # Print samples to verify
+    print("gp:", list(course_to_gp.items())[:5])
+    print("area:", list(course_to_area.items())[:5])
+    print("areas:", list(areacode_to_area.items())[:5])
+    print("dean:", list(course_to_dean.items())[:5])
+    print("dean_names:", list(dean_code_to_name.items())[:5])
+
+    return (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name)
+
+
+##
+##  SEMESTER FETCHING
+##
+##
+
+
+def scrape_schedule_multi():
+    
+    global SEMESTER, short_sem, semester_begin, filename, filename_html
+
+    (gp, course_to_area, areacode_to_area, area_to_dean, dean, dean_code_to_name) = campus_dept_hierarchy()
+        
+    SEMESTER = 'Spring 2025'
+    short_sem = 'sp25'
+    semester_begin = strptime('01/27', '%m/%d')
+    filename = 'sp25_sched.json'
+    filename_html = 'sp25_sched.html'
+    
+    SEM = ['Fall 2022', 'Summer 2022 (View only)', 'Spring 2022 (View only)',
+           'Fall 2021 (View only)', 'Summer 2021 (View only)', 'Spring 2021 (View only)', 'Fall 2020 (View only)', 'Summer 2020 (View only)', 'Spring 2020 (View only)',
+           'Fall 2019 (View only)', 'Summer 2019 (View only)', 'Spring 2019 (View only)', 'Fall 2018 (View only)', 'Summer 2018 (View only)', 'Spring 2018 (View only)' ]
+                                
+                                     
+    
+    srt =  'fa22,su22,sp22,fa21,su21,sp21,fa20,su20,sp20,fa19,su19,sp19,fa18,su18,sp18'.split(',')
+    beg = ['08/22','06/13','01/31','08/23','06/14','02/01','08/24','06/15','01/27','08/26','06/17','01/28','08/27','06/18','01/29']
+    
+    #for i in [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]:
+
+    #SEMESTER = SEM[i]
+    #short_sem = srt[i]
+    #semester_begin = strptime(beg[i], '%m/%d')
+    #filename = '%s_sched.json' % short_sem
+    #filename_html = '%s_sched.html' % short_sem
+    
+    as_dict = scrape_schedule()
+    
+    expanded = list_latestarts(short_sem)
+    fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
+    
+    ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
+    with ffcsv as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(fields)
+        
+        for S in expanded:
+            parts = S['code'].split(' ')
+            S['dept'] = parts[0]
+            S['num'] = parts[1]
+            S['gp'] = gp[parts[0]]
+            S['dean'] = dean[parts[0]]
+            S['sem'] = short_sem
+            # S['act'] = S['cap']
+            if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
+            csvwriter.writerow( [ S[x] for x in fields ] )
+    
+    put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0) 
+
+
+
+# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
+def list_latestarts(term="fa23"):
+    
+    show_summary = 1
+    
+    the_year = '20' + term[2:4]
+    print("year: ", the_year, "  semester: ", term)
+    
+    #term_in = "cache/%s_sched.json" % term               
+    term_out = "cache/%s_latestarts.txt" % term
+    expanded_out = "%s_sched_expanded.json" % term
+    print("Writing output to " + term_out)
+    #infile = codecs.open(term_in, "r", "utf-8")
+    outfile = codecs.open(term_out, "w", "utf-8")
+    exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
+    expanded = []
+    #sched = json.loads(infile.read())
+
+
+    sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
+    #print sched 
+    by_date = {}
+    
+    if show_summary: print("course \t loc \t type \t time")
+    
+    for C in sched:
+        if (not C['type']) and C['loc'] != 'ONLINE':  # and C['time']:
+            C['type'] = 'in-person'
+            
+        if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
+        
+        if 'extra' in C:
+            if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
+                C['type'] = 'hybrid'
+        
+        times = C['time'].split("-")
+        if len(times) > 1:
+            time_start = times[0]
+            time_end = times[1]
+
+            try:
+                startt = time.strptime(time_start,"%I:%M %p")
+                endt = time.strptime(time_end,"%I:%M %p")
+                min_start = startt.tm_min
+                min_end = endt.tm_min
+                if min_start == 0: min_start = "00"
+                else: min_start = str(min_start)
+                if min_end == 0: min_end = "00"
+                else: min_end = str(min_end)
+                C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
+                C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
+                if 0:
+                    print("+  Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
+            except Exception as e:
+                print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
+        else:
+            C['time_start'] = ''
+            C['time_end'] = ''
+            
+        if re.search('TBA',C['date']): 
+            C['start'] = ''
+            C['end'] = ''
+            C['doy'] = ''
+            expanded.append(C)
+            continue
+
+        parts = C['date'].split("-") 
+        start = parts[0] + "/" + the_year
+        end = parts[1] + "/" + the_year
+
+        try:
+            startd = parser.parse(start)
+            endd = parser.parse(end)
+            C['start'] = "%i-%i" % (startd.month,startd.day)
+            C['end'] = "%i-%i" % (endd.month,endd.day)
+            C['doy'] = startd.timetuple().tm_yday
+            expanded.append(C)
+        except Exception as e:
+            print(e, "\n-- problem parsing ", start, " or ", end)
+        if not startd in by_date:
+            by_date[startd] = []
+        by_date[startd].append(C)
+        
+    exoutfile.write( json.dumps(expanded,indent=2) )
+    exoutfile.close()
+    put_file('/home/public/schedule/', 'cache/', expanded_out, 0)  
+    
+    for X in sorted(by_date.keys()):
+        #print("Start: ", X)
+        if len(by_date[X]) < 200:
+            prettydate = X.strftime("%A, %B %d")
+            #print(prettydate + ": " + str(len(by_date[X])) + " courses")
+            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
+            for Y in by_date[X]:
+                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
+                #print(Y)
+                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
+                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
+    outfile.close()
+    put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)  
+    return expanded
+
+
+
+
+# Schedule / course filling history
+# csv headers:  crn, code, teacher,  datetime, cap, act, wlcap, wlact
+# Log the history of enrollments per course during registration
+def log_section_filling(current_sched_list, short_sem):
+    rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
+    rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
+    print(rows_j)
+    now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
+    csv_fn = 'cache/reg_history_' + short_sem + '.csv'
+    with codecs.open(csv_fn,'a','utf-8') as f:
+        writer = csv.writer(f)
+        for S in current_sched_list:
+            #print(S)
+            items = [now,]
+            [ items.append( S[X] ) for X in rows_j ]
+            writer.writerow(items)
+            
+# Same as above, but compressed, act only
+def log_section_filling2(current_sched_list, short_sem):
+
+
+
+    now = datetime.datetime.now().strftime('%Y-%m-%dT%H')
+    
+    todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
+    #print(todays_data)
+    
+    todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+    todays_df = todays_df.rename_axis('crn')
+    #print(todays_df)
+    todays_df.to_csv('cache/reg_today_new.csv', index=True)
+    
+    try:
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        print(myframe)
+    except:
+        fff = open('cache/reg_data_'+short_sem+'.csv','w')
+        fff.write('crn\n')
+        fff.close()
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+        #myframe = myframe.rename_axis('crn')
+        print("Creating new data file for this semester.")
+    
+    new_df = myframe.join( todays_df, on='crn', how='outer' )
+    new_df = new_df.rename_axis('crn')
+    print(new_df)
+    
+    reg_data_filename = 'reg_data_' + short_sem + '.csv'
+    new_df.to_csv('cache/' + reg_data_filename, index=False)
+    put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0)                
+    
+
+# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
+def scrape_schedule(short_sem, semester_form_text="SPRING 2025"):
+
+    filename = f"{short_sem}_sched.json"
+                                  
+    #url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu"
+    url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
+                    
+                 
+    text = ''
+    
+    from selenium import webdriver
+    from selenium.webdriver.common.keys import Keys
+    from selenium.webdriver.support.ui import WebDriverWait, Select
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.support import expected_conditions as EC
+
+    try:
+        driver = webdriver.Firefox()
+        driver.get(url)
+        driver.find_element_by_id("UserID").clear()
+        driver.find_element_by_id("UserID").send_keys(GOO)
+        driver.find_element_by_name("PIN").send_keys(GOO_PIN)
+        driver.find_element_by_name("loginform").submit()
+        driver.implicitly_wait(5)
+        
+        print(driver.title)
+        
+        driver.find_element_by_link_text("Students").click()
+        driver.implicitly_wait(5)
+        print(driver.title)
+        
+        driver.find_element_by_link_text("Registration").click()
+        driver.implicitly_wait(5)
+        print(driver.title)
+        
+        driver.find_element_by_link_text("Search for Classes").click()
+        driver.implicitly_wait(15)
+        print(driver.title)
+        
+        dd = Select(driver.find_element_by_name("p_term"))
+        if (dd):
+            dd.select_by_visible_text(SEMESTER)    
+            driver.find_element_by_xpath("/html/body/div/div[4]/form").submit()
+            driver.implicitly_wait(15)
+            print(driver.title)
+
+        driver.find_element_by_xpath("/html/body/div/div[4]/form/input[18]").click()   
+        driver.implicitly_wait(10)
+        print(driver.title)
+        
+        driver.find_element_by_name("SUB_BTN").click()
+        driver.implicitly_wait(40)
+        time.sleep(15)
+        driver.implicitly_wait(40)
+        print(driver.title)
+        text = driver.page_source
+        driver.quit()
+        
+    except Exception as e:
+        print("Got an exception: ", e)
+    finally:
+        print("")
+        #driver.quit()
+    
+          
+                                                   
+                                      
+              
+    
+    codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
+    
+    
+    
+    #print(text)
+    as_list = ssb_to_csv(text)
+    #print(as_list)
+    as_dict = to_section_list(as_list)
+    jj = json.dumps(as_dict,indent=2)
+    
+    # TODO
+    try:
+        ps = codecs.open('cache/'+filename,'r','utf-8')
+        prev_sched = json.loads(ps.read())
+        ps.close()
+        
+        if 1:   # sometimes I want to re-run this without affecting the logs.
+            log_section_filling(as_dict, short_sem)
+            log_section_filling2(as_dict, short_sem)
+        
+        dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
+        pretty_json = json.dumps(  json.loads( dd.to_json() ), indent=2 )
+        codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write(  pretty_json )    # dd.to_json() )
+        
+    except Exception as e:
+        print(e)
+        print("Can't do diff?")
+    
+    # Next, rename the prev sched_xxYY.json data file to have its date,
+    # make this new one, and then upload it to the website. 
+    # Maybe even count the entries and do a little sanity checking
+    #
+    # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
+    # print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
+    
+                                       
+    try:
+        last_mod = time.ctime(os.path.getmtime('cache/' + filename))
+        
+        import pathlib
+        prev_stat = pathlib.Path('cache/' + filename).stat()
+        mtime = dt.fromtimestamp(prev_stat.st_mtime)
+        print(mtime)
+    except:
+        print("Couldn't Diff.")
+    # fname = pathlib.Path('test.py')
+    # assert fname.exists(), f'No such file: {fname}'  # check that the file exists
+    # print(fname.stat())
+    #
+    # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413, 
+    #                st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
+
+
+
+    codecs.open('cache/' + filename, 'w', 'utf-8').write(jj)
+    
+    put_file('/home/public/schedule/', 'cache/', filename, 0)                             #  /gavilan.edu/_files/php/
+    
+    return as_dict
+ 
+
+
+
+def dza_sched():
+    text = codecs.open('cache/sched_fa22_deanza.html','r','utf-8').read()
+    as_list = ssb_to_csv(text)
+    #print(as_list)
+    as_dict = to_section_list(as_list)
+    jj = json.dumps(as_dict,indent=2)
+    codecs.open('cache/fa22_sched_deanza.json','w','utf-8').write(jj)
+    
+# recreate schedule json files with most current online schedule format. 
+def recent_schedules():
+    #  # todo: sems is a global in this file. Is that the right thing to do?   
+    #all_scheds = [ os.listdir( 'cache/rosters/' + short_to_long(s)) for s in sems ]
+    #for i,s in enumerate(sems):
+    for s in ['sp21',]:
+        filename = 'cache/sched_' + s + '.html'
+        print("Filename is %s" % filename)
+        input = codecs.open( filename, 'r', 'utf-8').read()
+        output = ssb_to_csv(input)
+        
+        csv_fn = 'cache/temp_sched_' + s + '.csv'
+        if os.path.isfile(csv_fn):
+            os.remove(csv_fn)
+        
+        codecs.open(csv_fn,'w','utf-8').write(output)
+        
+        jsn = to_section_list(output)
+        jsn_fn = 'cache/semesters/'+short_to_long(s)+'/'+s+'_sched.json'
+        if os.path.isfile(jsn_fn):
+            os.remove(jsn_fn)
+        codecs.open(jsn_fn,'w').write(json.dumps(jsn))
+    print("I put the most recent schedule JSON files in ./cache/semesters/... folders.")
+
+
+
+
+# Take banner's html and make a csv(?) file  
+def ssb_to_csv(src):
+    #out = codecs.open(schedfile,'w','utf-8')
+    output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
+    b = bs(src, 'html.parser')
+    tab = b.find(class_="datadisplaytable")
+    if not tab:
+        print("hmm... didn't find a 'datadisplaytable' in this html: ")
+        #print(src)
+        return 0
+    rows = tab.find_all('tr')
+    drows = list(filter(row_has_data,rows))
+    for dd in drows:
+        t = row_text(dd)
+        output += t
+    return output
+    
+
+
+# take text lines and condense them to one dict per section    
+def to_section_list(input_text,verbose=0):
+    this_course = ''
+    #todo: no output files
+    #jout = codecs.open(filename, 'w', 'utf-8')
+    #input = csv.DictReader(open(schedfile,'r'))
+    #input = UnicodeDictReader(input_text.splitlines())
+    all_courses = []
+    
+    
+    try:
+        f = StringIO(input_text)
+    except:
+        print("ERROR with this input_text:")
+        print(input_text)
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        #pdb.set_trace()
+        # clean funny unicode char in blank entries
+        r = {k: clean_funny2(v) for k,v in list(d.items()) }
+        if verbose: print("Cleaned: " + str(r))
+        
+        if 'time' in r:
+            if r['time']=='TBA': r['time'] = ''
+            if r['time']: r['partofday'] = time_to_partofday(r['time'])
+            
+        r['type'] = ''
+        
+        if 'loc' in r:
+            if r['loc'] == 'ONLINE': r['type'] = 'online'
+            if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
+            if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
+            if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
+        
+        if 'code' in r:
+            if re.search(r'ONLINE\sLIVE',r['code']):
+                r['type'] = 'online live'
+            elif re.search(r'ONLINE',r['code']):
+                r['type'] = 'online'
+                
+        # does it have a section? it is the last course
+        if r['crn']:   # is a new course or a continuation?
+            if verbose: print("  it's a new section.")
+            if this_course:
+                if not this_course['extra']: this_course.pop('extra',None)
+                all_courses.append(this_course)
+            this_course = r
+            #print(r['name'])
+            this_course['extra'] = []
+        else:
+            # is a continuation line
+            if verbose: print("  additional meeting: " + str(r))
+            for k,v in list(r.items()):
+                if not v: r.pop(k,None)
+            # TODO: if extra line is different type?
+            #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
+            #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
+            this_course['extra'].append(r)
+    return all_courses
+
+
+##
+## SCHEDULE PARSE HELPERS
+##
+##
+def time_to_partofday(t):
+    #todo: account for multiple sites/rows
+    # 11:20 am-12:10 pm
+    mor     = strptime('12:00 PM', '%I:%M %p')
+    mid     = strptime( '2:00 PM', '%I:%M %p')
+    aft     = strptime( '6:00 PM', '%I:%M %p')
+    if t == 'TBA':
+        return 'TBA'
+    t = t.upper()    
+    parts = t.split('-')
+    try:
+        begin = strptime(parts[0], '%I:%M %p')
+        end = strptime(parts[1], '%I:%M %p')
+        if end > aft:
+            return "Evening"
+        if end > mid:
+            return "Afternoon"
+        if end > mor:
+            return "Midday"
+        return "Morning"
+        #return begin,end
+    except Exception as e:
+        #print 'problem parsing: ', t, "   ",
+        return ""
+
+# Deduce a 'site' field, based on room name and known offsite locations    
+def room_to_site(room,verbose=0):
+    #todo: account for multiple sites/rows
+    #todo: better way to store these offsite labels
+    othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')    
+    # is it gilroy, mh, hol, other, online or hybrid?
+    site = 'Gilroy'
+    #if len(course[0]) > 13:
+    #    room = course[0][13]
+    if room in othersites:
+        site = "Other"
+    if room == 'TBA':
+        site = 'TBA'
+    if room == 'AV':
+        site = 'San Martin Airport'
+    if re.search('MHG',room):
+        site = 'Morgan Hill'
+    if re.search('HOL',room):
+        site = 'Hollister'
+    if re.search('COY',room):
+        site = 'Coyote Valley'
+    if re.search('OFFSTE',room):
+        site = 'Other'
+    if re.search('ONLINE',room):
+        site = 'Online'
+    if verbose: print(room, '\t', end=' ') 
+    return site
+  
+
+
+def row_has_data(r):      # helper    
+    if r.find_all('th'):
+        return False
+    if len(r.find_all('td')) > 2:
+        return True
+    if re.search('Note\:', r.get_text()):
+        return True
+    return False
+
+def row_text(r):   # helper  
+    #global dbg
+    
+    d("Row Txt Fxn gets:  ")
+    arr = []
+    for t in r.find_all('td'):
+        if t.contents and len(t.contents) and t.contents[0].name == 'img':
+            arr.append("1")
+            d("img")
+        r_text = t.get_text()
+        arr.append(r_text)
+        if 'colspan' in t.attrs and t['colspan']=='2':
+            d('[colspan2]')
+            arr.append('')
+        d("\t"+r_text, end=" ")
+    d('')
+    
+    if len(arr)==1 and re.search('Note\:',arr[0]):
+        note_line = clean_funny( arr[0] )
+        note_line = re.sub(r'\n',' ', note_line)
+        note_line = re.sub(r'"','', note_line)
+        #note_line = re.sub(r',','\,', note_line)
+        return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
+    del arr[0]
+    arr[1] = clean_funny(arr[1])
+    arr[2] = clean_funny(arr[2])
+    if arr[1]: arr[1] = arr[1] + " " + arr[2]
+    del arr[2]
+    arr = [ re.sub(r'&nbsp;','',a) for a in arr]
+    arr = [ re.sub(',','. ',a) for a in arr]
+    arr = [ re.sub('\(P\)','',a) for a in arr]
+    arr = [ a.strip() for a in arr]
+    #del arr[-1]
+    r = ','.join(arr)+'\n'
+    r = re.sub('\n','',r)
+    r = re.sub('add to worksheet','',r)
+    d("Row Txt Fxn returns:  " + r + "\n\n")
+
+    return r + '\n'
+
+
+
+
+        
+def clean_funny(str):
+    if str and str.encode('utf8') == ' ': return ''
+    return str
+def clean_funny2(str):
+    if str and str == '\xa0': return ''
+    if str and str == ' ': return ''
+    return str
+    
+def clean_funny3(str):
+    return re.sub('\xa0','',str)
+
+# Go to the semesters folder and read the schedule. Return dataframe    
+def get_semester_schedule(short='sp21'):                # I used to be current_schedule
+    # todo: Some semesters have a different format.... partofday   type   site  xxx i just dL'd them again
+    
+    filename = 'cache/semesters/'+short_to_long(short)+'/' + short + '_sched.json'
+    print("opening %s" % filename)
+    #openfile = open(filename,'r')
+    #a = json.loads(openfile)
+    #return pd.DataFrame(a)
+    schedule = pd.read_json(filename)
+    schedule.teacher = schedule['teacher'].apply(fix_t_name)
+    #print schedule['teacher']
+    for index,r in schedule.iterrows():
+        tch = r['teacher']
+        parts = tch.split(' . ')
+        if len(parts)>1:
+            #print "Multiple teachers: (" + tch + ")"
+            schedule.loc[index,'teacher'] = parts[0]
+            #print "  Fixed original: ", schedule.loc[index]
+            
+            for t in parts[1:]: 
+                r['teacher'] = t
+                schedule.loc[-1] = r
+                #print "  New row appended: ", schedule.loc[-1]
+    schedule = schedule.assign(dept = schedule['code'].apply(split_class_dept))
+    schedule = schedule.assign(codenum = schedule['code'].apply(split_class_code))
+    schedule = schedule.assign(codeletter = schedule['code'].apply(split_class_code_letter))
+    #print(schedule)
+    schedule['sem'] = short
+    #print schedule.columns
+    return schedule
+  
+
+
+## 
+##  CMDLINE INTERACTIVE SEARCH
+##
+##
 
-columns, rows = os.get_terminal_size()
 course_types = {'in-person':'IP','hybrid':'H','online':'O','online live':'OL'}
 
 def course_to_string(crs):
@@ -88,7 +870,7 @@ def write_at(row, col, text):
     sys.stdout.write(f"\033[{row};{col}H{text}")
     sys.stdout.flush()
 
-def write_search_results(res,maximum=8):
+def write_search_results(res,columns,rows,maximum=8):
     height = max(len(res),maximum)
     while len(res)<height:
         res.append('')
@@ -97,7 +879,7 @@ def write_search_results(res,maximum=8):
 
     write_at(rows,0,'')
 
-def write_cleared_search_results(maximum=8):
+def write_cleared_search_results(columns,rows,maximum=8,):
     for i in range(maximum):
         write_at(rows - i, 1, f"                                                       ")
     write_at(rows,0,'')
@@ -105,7 +887,7 @@ def write_cleared_search_results(maximum=8):
 
 
 
-examples = 0
+'''examples = 0
 if examples:
     # Get schedule for a specific room on a specific day
     room = 'HU 104'
@@ -119,12 +901,14 @@ if examples:
     teacher = 'Kimberly J Smith'
     print(f"\nWeekly schedule for {teacher}:")
     for course in teachers[teacher]:
-        print("  " + course_to_string(course))
+        print("  " + course_to_string(course))'''
     
 
 
-def interactive(allkeys):
+def interactive(allkeys,deptkeys,depts,teacherkeys,teachers, locationkeys,locations,coursenameskeys,coursenames,codeskeys,codes,crnskeys,crns):
     import sys
+    columns, rows = os.get_terminal_size()
+
 
     def getch():
         # Unix
@@ -164,9 +948,9 @@ def interactive(allkeys):
             query += char
         results = [x[0] for x in autocomplete.search(word=query, max_cost=3, size=5)]
         print(query+"                ",end='',flush=True)
-        write_search_results(results)
+        write_search_results(results,columns,rows)
         
-    write_cleared_search_results()
+    write_cleared_search_results(columns,rows)
     print()
     print()
     for (keyset,dataset) in [(deptkeys,depts),(teacherkeys,teachers), (locationkeys,locations),(coursenameskeys,coursenames),(codeskeys,codes),(crnskeys,crns)]:
@@ -177,29 +961,433 @@ def interactive(allkeys):
                 for course in dataset[real_key]:
                     print("  " + course_to_string(course))
 
+def interactive_schedule_search():
+    while True:
+        # Read in schedule
+        depts, crns, codes, coursenames, locations, teachers, days = parse_courses('cache/sample_semester.json')
 
-while True:
-    # Read in schedule
-    depts, crns, codes, coursenames, locations, teachers, days = parse_courses('cache/sample_semester.json')
+        deptkeys = { x.lower():x for x in depts.keys() }
+        teacherkeys = { x.lower():x for x in teachers.keys() }
+        locationkeys = { x.lower():x for x in locations.keys() }
+        coursenameskeys = { x.lower():x for x in coursenames.keys() }
+        codeskeys = { x.lower():x for x in codes.keys() }
+        crnskeys = { x.lower():x for x in crns.keys() }
 
-    deptkeys = { x.lower():x for x in depts.keys() }
-    teacherkeys = { x.lower():x for x in teachers.keys() }
-    locationkeys = { x.lower():x for x in locations.keys() }
-    coursenameskeys = { x.lower():x for x in coursenames.keys() }
-    codeskeys = { x.lower():x for x in codes.keys() }
-    crnskeys = { x.lower():x for x in crns.keys() }
+        lname_first = [x.split() for x in teachers.keys() ]
+        d_lnf = { x[-1].lower() + ", " + ' '.join(x[:-1]).lower(): ' '.join(x) for x in lname_first }
+        teacherkeys.update(d_lnf)
 
-    lname_first = [x.split() for x in teachers.keys() ]
-    d_lnf = { x[-1].lower() + ", " + ' '.join(x[:-1]).lower(): ' '.join(x) for x in lname_first }
-    teacherkeys.update(d_lnf)
+        allkeys = {}
+        allkeys.update(deptkeys)
+        allkeys.update(teacherkeys)
+        allkeys.update(locationkeys)
+        allkeys.update(coursenameskeys)
+        allkeys.update(codeskeys)
+        allkeys.update(crnskeys)
 
-    allkeys = {}
-    allkeys.update(deptkeys)
-    allkeys.update(teacherkeys)
-    allkeys.update(locationkeys)
-    allkeys.update(coursenameskeys)
-    allkeys.update(codeskeys)
-    allkeys.update(crnskeys)
+        print("\nEnter your query or 'quit': ")
+        if interactive(allkeys,deptkeys,depts,teacherkeys,teachers, locationkeys,locations,coursenameskeys,coursenames,codeskeys,codes,crnskeys,crns) == False: break
 
-    print("\nEnter your query or 'quit': ")
-    if interactive(allkeys) == False: break
+
+## Download all schedules or just most recent
+
+def download_fresh_schedules():
+    loc = 'cache/schedules'
+    folder_path = Path(loc)
+    folder_path.mkdir(parents=True, exist_ok=True)
+
+    from semesters import sems_by_short_name
+
+    for short,sem in sems_by_short_name.items():
+        try:
+            print(sem["name"])
+            sched = requests.get(f"http://gavilan.cc/schedule/{short}_sched_expanded.json")
+            if sched.status_code != 200: 
+                print(f"  not found")
+                continue
+            schedfile = codecs.open(f"{loc}/{short}_sched_expanded.json","w","utf-8")
+            schedfile.write(sched.text)
+            schedfile.close()
+        except Exception as e:
+            print(e)
+
+
+# clean leading, multiple, and trailing spaces
+def clean_name(t):
+    t = t.strip()
+    t = re.sub('\s+', ' ', t)
+    return t
+
+
+# given their name, a list of courses, provide a summary of some stats
+def summarize_teacher(who,what):
+    tally_course_code = defaultdict( int )
+    tally_mode = defaultdict( int )
+    tally_site = defaultdict( int )
+
+    tally_depts = defaultdict( int )
+
+    num_sections = 0
+    which_semesters = defaultdict(int)
+
+    all_sems = funcy.pluck("sem",what)
+    all_sems_sis = [ short_to_sis(x) for x in all_sems ]
+    all_sems_sis = list( set( all_sems_sis))
+    all_sems_sis.sort()
+    oldest = sis_to_human(all_sems_sis[0])
+    newest = sis_to_human(all_sems_sis[-1])
+
+    for crs in what:
+        tally_course_code[ crs['code'] ] += 1
+        tally_mode[ crs['type'] ] += 1
+        tally_site[ crs['site'] ] += 1
+        num_sections += 1
+        which_semesters[ crs['sem']] += 1
+
+        try:
+            c_parts = crs['code'].split(' ')
+            dept = c_parts[0]
+            tally_depts[dept] += 1
+        except:
+            pass
+    top_two = sorted(tally_depts, key=tally_depts.get, reverse=True)[:2]
+    d1 = top_two[0]
+    d2 = ''
+    if len(top_two)>1: d2 = top_two[1]
+
+    training = json.loads( codecs.open('cache/gott_by_goo.json','r','utf-8').read() )
+
+    my_training = {}
+    if who in training:
+        my_training = training[who]
+
+    return { 'name':who, 'num_sections':num_sections, 'num_semesters':len(which_semesters.keys()), 'num_years': int(len(which_semesters.keys())/3), 
+            'earliest_sem': oldest, 'most_recent':newest,
+            'courses':tally_course_code, 
+            'training':my_training,
+            'modes':tally_mode, 'sites':tally_site, 'dept':d1, 'dept2':d2 }
+
+
+def find_goo_for_name(name_dict,name):
+    name = clean_name(name)
+    if name in name_dict:
+        if name_dict[name]:
+            print(f"ok {name}")
+            return name_dict[name]
+    parts = name.split(' ')
+    if len(parts) == 3:
+        new_name = f"{parts[0]} {parts[2]}"
+        if new_name in name_dict and name_dict[new_name]:
+            print(f"ok {new_name}")
+            return name_dict[new_name]
+    print(f"*** no goo number for {name}.")
+    return ''
+
+# All teachers' teaching history
+def teachers_history():
+
+    # Let's just make a giant dict of names we know about -> G numbers
+    big_name_to_goo = defaultdict(str)
+
+    # all ilearn accounts with gavilan.edu address
+    ilearn_names = json.loads( codecs.open('cache/ilearn_staff.json','r','utf-8').read())
+    for iln in ilearn_names:
+        try:
+            big_name_to_goo[iln['name']] = iln['sis_user_id']
+        except:
+            pass
+
+    # everyone who's been in teacher role   [ name, id, goo, created dt, coursename]
+    all_teacher_role = everyone_teacher_role()
+    for atr in all_teacher_role:
+        big_name_to_goo[atr[0]] = atr[2]
+    teachers_by_id = { str(x[1]): [x[0], x[2], x[3] ] for x in all_teacher_role }
+
+    # names that don't match
+    non_matching_names = [ line.strip().split(',') for line in codecs.open('cache/sched_name_to_ilearn_id.csv','r','utf-8').readlines() ]
+    for nmn in non_matching_names:
+        try:
+            big_name_to_goo[nmn[0]] = teachers_by_id[nmn[3]][1]
+        except:
+            pass
+        try:
+            big_name_to_goo[nmn[1]] = teachers_by_id[nmn[3]][1]
+        except:
+            pass
+
+    #print( sorted(list(big_name_to_goo.keys())) )
+
+    # we want canonical names also
+    big_goo_to_name = {}
+
+    for nnn,goo in big_name_to_goo.items():
+        big_goo_to_name[goo] = nnn
+
+
+    # Define the directory to search
+    folder_path = Path("cache/schedules")
+
+    all_sections_by_goo = defaultdict(list)
+
+    #non_matches_file = codecs.open('cache/nonmatches.txt','w','utf-8')
+
+    # Define the regex pattern
+    pattern = re.compile(r"(\w\w\d\d)_sched_expanded\.json")
+
+    i = 0
+
+    # Iterate through all saved schedule json files
+    for file in folder_path.iterdir():
+        if file.is_file() and pattern.match(file.name):
+            m = pattern.match(file.name)
+            sem = m.groups()[0]
+            print(sem)
+            with file.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+                for c in data:
+                    c['sem'] = sem
+                    multiples = c['teacher'].split(' . ')
+                    i += len(multiples)
+                    if len(multiples) > 1:
+                        for m in multiples:
+                            cn = clean_name(m)
+                            goo = find_goo_for_name(big_name_to_goo,m)
+                            all_sections_by_goo[goo].append(c)
+                            #print(f"{sem}\t{cn}\t{goo}\t{c['code']}")
+                    else:
+                        cn = clean_name(c['teacher'])
+                        goo = find_goo_for_name(big_name_to_goo,cn)
+                        all_sections_by_goo[goo].append(c)
+                        #print(f"{sem}\t{cn}\t{goo}\t{c['code']}")
+    
+                        
+    #print(json.dumps(teacher_names_raw, indent=2))
+    '''goos = sorted(list(all_sections_by_goo.keys()))
+    non_match_lookup = { x[0]: x[3].strip() for x in non_matching_names }
+
+
+    for N in goos:
+        found = in_list(ilearn_names,N,'name','sis_user_id')
+        new_name = ''
+        if not found:
+            parts = N.split(' ')
+            if len(parts) == 3:
+                new_name = f"{parts[0]} {parts[2]}"
+            found = in_list(ilearn_names,new_name,'name','sis_user_id')
+
+        try:
+            if not found:
+                if N in non_match_lookup and non_match_lookup[N]:
+                    found = teachers_by_id[non_match_lookup[N]][1]
+                elif new_name in non_match_lookup and non_match_lookup[new_name]:
+                    found = teachers_by_id[non_match_lookup[new_name]][1]
+        except Exception as e:
+            print(f"Exception on {N}: {e}")
+        print(f"{found} \t {N}")
+    print(f"{i} section/teachers analysed")'''
+
+    all_summary = []
+
+    index = []
+
+    DO_UPLOADS = 0
+    if input("Do uploads? (y/n) ") == 'y': DO_UPLOADS = 1
+
+    dataout = codecs.open('cache/teacherhistory.txt','w','utf-8')
+    for G,L in all_sections_by_goo.items():
+        #dataout.write(f"{G}\n")
+        if G:
+            N = big_goo_to_name[G]
+            summary = summarize_teacher(G,L)
+            summary['name'] = N
+            summary['goo'] = G
+            summary['sections'] = []
+            for course in L:
+                dataout.write(f"{G},{N},{course['sem']},{course['code']},{course['cred']},{course['type']},{course['site']},{course['days']}\n")
+                summary['sections'].append(f"{G},{N},{course['sem']},{course['code']},{course['cred']},{course['type']},{course['site']},{course['days']}")
+            all_summary.append(summary)
+
+            index.append( [G,N,summary['courses'],summary['training'],summary['most_recent'], summary['dept'],summary['dept2'] ] )
+
+            teacherout = codecs.open(f"cache/faculty/{G}.json","w","utf-8")
+            teacherout.write(json.dumps(summary,indent=2))
+            teacherout.close()
+            if DO_UPLOADS:
+                print(f"uploading {N}")
+                put_file(f"/home/public/faculty/", f"cache/faculty/", f"{G}.json", 0)
+
+    summaryout = codecs.open('cache/teachersummary.json','w','utf-8')
+    summaryout.write(json.dumps(all_summary,indent=2))
+    summaryout.close()
+
+    indexout = codecs.open('cache/faculty/index.json','w','utf-8')
+    indexout.write(json.dumps(index,indent=2))
+    indexout.close()
+
+    if DO_UPLOADS:
+        put_file(f"/home/public/faculty/", f"cache/", f"teachersummary.json", 0)
+        put_file(f"/home/public/faculty/", f"cache/faculty/", f"index.json", 0)
+
+
+def in_list(li,needle,key,ret_key):
+    for L in li:
+        if L[key] == needle: return L[ret_key]
+    return False
+
+def time_range(a,b):
+    if a and b:
+        return f"{add_colon_to_24hr_time(a)}-{add_colon_to_24hr_time(b)}"
+    return ""
+
+def add_colon_to_24hr_time(the_time):
+    if the_time:
+        the_time = str(the_time)
+        return the_time[0:-2] + ":" + the_time[-2:]
+    return ""
+
+def date_without_year(d):
+    return f"{d.month}/{d.day}"
+
+def tchr(a,b):
+    if a and b:
+        return f"{a} {b}"
+    return ""
+
+def type_num_to_type_str(typenum):
+    typenum = str(typenum)
+    if typenum in ["72","20","736","737","73A"]:
+        return "online"
+    if typenum in ["2","4","45","46","47","04A","04B"]:
+        return "in-person"
+    if typenum in ["5","40"]:
+        return "hybrid"
+    if typenum in ["71","73B","73"]:
+        return "online live"
+    return typenum
+
+def excel_schedule():
+
+    su_xl = 'SU 2025- 3.24.25.xlsx'
+    fa_xl = 'FA 2025- 3.24.25.xlsx'
+
+    infile = fa_xl
+
+    outfile = 'fa25_sched_expanded.json'
+
+    
+    from openpyxl import Workbook, load_workbook
+    from openpyxl.chart import BarChart, Series, Reference
+    from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill
+    wb = load_workbook(f"cache/{infile}")
+    print(wb.sheetnames)
+
+    # Select the active sheet (or you can pick a specific one: wb['SheetName'])
+    ws = wb['Schedule by Division and Dept -']     #wb.active
+
+    # Read and print all rows
+    #for row in ws.iter_rows(values_only=True):
+    #    print(row)
+    #
+    #
+
+    # Extract header row
+    headers = [cell for cell in next(ws.iter_rows(min_row=1, max_row=1, values_only=True))]
+
+    # Mapping from Excel headers to your desired keys
+    header_map = {
+        "crn": "CRN",
+        "code": "Course",
+        "sec": "SeqNo",
+        #"PtTerm": "cmp",
+        "cred": "Units",
+        "name": "Title",
+        "days": "Days",
+        "time": lambda row: time_range(row[headers.index('BegTime')], row[headers.index('EndTime')]),
+        "time_start": lambda row: add_colon_to_24hr_time( row[headers.index["BegTime"]]),
+        "time_end": lambda row: add_colon_to_24hr_time( row[headers.index["EndTime"]]),
+        "cap": "MaxEnroll",
+        "rem": "MaxEnroll",
+        "teacher": lambda row: tchr(row[headers.index('FirstName')], row[headers.index('LastName')]),
+        "date": lambda row: f"{date_without_year(row[headers.index('StartDate')])}-{date_without_year(row[headers.index('EndDate')])}",
+        "start": lambda row: date_without_year(row[headers.index('StartDate')]),
+        "end": lambda row: date_without_year(row[headers.index('EndDate')]),
+        "loc": lambda row: "ONLINE" if row[headers.index("Bldg")] == "ONLINE" else f"{row[headers.index('Bldg')]} {row[headers.index('Room')]}",
+        # Add fixed fields
+        "ztc": lambda row: "",
+        "note": "Footnote",
+        "type": lambda row: type_num_to_type_str(row[headers.index('SchdTyp')]),
+        "site": lambda row: "",
+        "doy": lambda row: row[headers.index("StartDate")].timetuple().tm_yday if row[headers.index("StartDate")] else "",
+    }
+
+    # Function to convert a row to your dict format
+    def convert_row(row):
+        data = {}
+        for k, v in header_map.items():
+            try:
+                if isinstance(v, type(lambda: None)):
+                    d2 = v(row)
+                    if d2:
+                        data[k] = str(d2)
+                    else:
+                        data[k] = ""
+                else:
+                    d1 = row[ headers.index( header_map[k] ) ]
+                    if d1:
+                        data[k] = str(d1)
+                    else:
+                        data[k] = ""
+            except Exception as e:
+                print(f"Exception for key [{k}] value [{v}] on ROW: {row}\n{e}")
+        print(json.dumps(data,indent=2))
+        return data
+
+    # Process all data rows
+    data_rows = list(ws.iter_rows(min_row=2, values_only=True))
+    converted = [convert_row(row) for row in data_rows]
+
+    print(json.dumps(converted,indent=2))
+    with codecs.open(f'cache/schedules/{outfile}','w','utf-8') as ofi:
+        ofi.write(json.dumps(converted,indent=2))
+
+# wanted: a class's teacher history: everyone who teaches it.
+
+if __name__ == "__main__":
+    
+    print ('')
+    options = { 1: ['Interactive schedule search',interactive_schedule_search] ,  
+                2: ['test areas gp and deans',campus_dept_hierarchy] , 
+                3: ['download_fresh_schedules', download_fresh_schedules],
+                4: ['teachers_history', teachers_history],
+                5: ['parse in progress schedule', excel_schedule],
+    }
+    '''3: ['Fetch rosters on schedule',fetch_current_rosters_auto] , 
+                4: ['Compute how registration is filling up classes', schedule_filling] , 
+                5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 
+                6: ['Canvas data: interactive sync', interactive ], 
+                7: ['Canvas data: automated sync', sync_non_interactive ], 
+                8: ['Get canvas data 2024 style', canvas_data_2024_run ],
+                9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
+                16: ['Scrape schedule from ssb', scrape_schedule_multi ], 
+                14: ['Generate latestart schedule', list_latestarts ],
+                15: ['Test ssb calls with python', scrape_schedule_py ], 
+                10: ['schedule to db', scrape_for_db ], 
+                11: ['clean argos draft schedule file', argos_data_from_cvc],
+                12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
+                13: ['Parse deanza schedule', dza_sched ],'''
+
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+        
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
\ No newline at end of file
diff --git a/search.py b/search.py
new file mode 100644
index 0000000..6c3c431
--- /dev/null
+++ b/search.py
@@ -0,0 +1,556 @@
+###
+###
+### Text / Knowledge Base
+###
+### How about downloading all possible info / webpages / sources
+### related to Gavilan and creating a master search index?
+###
+### Goals:
+### - Scripted approach to allow re-indexing / updating
+### - Break everything down into paragraphs
+###
+### - Script to extract keywords, topics, entities, summaries, questions answered 
+###   from each paragraph or chunk.
+### - Use spacy, gensim, nltk, or gpt-3, or a combination of all of them
+###
+### - Create vector / embeddings for each paragraph
+###
+### - Enable a vector search engine and connect to front page of gavilan.cc
+### - Use that to feed handful of source paragraphs (& prompt) into gpt and
+###   receive text answers to questions.
+
+
+import re, os, codecs, requests, trafilatura, pickle, pypandoc
+from collections import defaultdict
+from pdfminer.high_level import extract_text
+from sentence_transformers import SentenceTransformer, util
+
+from util import clean_fn
+
+
+
+def demo_vector_search():
+    from gensim.models import Word2Vec
+    from gensim.utils import simple_preprocess
+    import nltk.data
+    import spacy
+
+    # (might have to upgrade pip first...)
+    # pip install --upgrade click
+    #
+    # python -m spacy download en_core_web_sm
+    # python -m spacy download en_core_web_lg
+
+    def is_complete_sentence(text):
+        #text = text.text
+        doc = nlp(text)
+        sentences = list(doc.sents)
+        if len(sentences) == 1 and text.strip() == sentences[0].text.strip():
+            return True
+        return False
+
+
+    sentences = [
+        "This is an example sentence.",
+        "Here is another sentence for training."
+    ]
+
+    paragraph = """Financial Aid services are available in person!  We are happy to assist you with your financial aid needs.  If you are interested in visiting the office in person, please review the guidelines for visiting campus and schedule your appointment:
+
+Guidelines for In-Person Financial Aid Services
+
+Due to FERPA regulations, no student information will be given to anyone other than the student without authorization from the student.
+We continue to offer virtual services.  Financial Aid staff may be reached by email, phone, text, and zoom!  Please refer to the contact information and schedules below.
+
+Gavilan-WelcomeCenter_Peer_Mentors.jpg
+
+Do you need assistance filing the FAFSA or California Dream Act Application? Friendly and knowledgeable Peer Mentors are available to assist you virtually and in person!  Details below for an online Zoom visit, phone call, or in-person visit with Peer Mentors. 
+
+Monday - Friday 8am - 5pm, Student Center
+Join Zoom to Connect with a Peer Mentor
+Or call (669) 900-6833 and use meeting ID 408 848 4800
+
+MicrosoftTeams-image.png
+
+ 
+
+Do you need assistance with an existing financial aid application, financial aid document submission, or review of your financial aid package? Schedule an in-person, phone, or zoom appointment with our Financial Aid counter. 
+
+Mon - Thurs: 9am - 1:00pm, 2:00pm - 5:00pm
+Fri: 10am - 2pm
+Office: (408) 848-4727     Email: finaid@gavilan.edu
+Schedule an In-Person, Phone or Zoom Appointment"""
+
+    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
+    sentences1 = tokenizer.tokenize(paragraph)
+    for i,s in enumerate(sentences1):
+        print(i, "\t", s)
+    print("\n\n")
+
+    #nlp = spacy.load('en_core_web_sm')
+    nlp = spacy.load('en_core_web_md')
+
+    doc = nlp(paragraph)
+    sentences2 = list(doc.sents)
+    for i,s in enumerate(sentences2):
+        t = re.sub(r'\n+',' ',s.text)
+        is_sentence = 'yes' if is_complete_sentence(t) else 'no '
+        print(i, " ", is_sentence, "  ", t)
+    print("\n\n")
+
+    #for text in sentences2:
+    #    print(text, "is a complete sentence?" , is_complete_sentence(text))   
+
+    return
+
+    tokenized_sentences = [simple_preprocess(s) for s in sentences]
+    model = Word2Vec(tokenized_sentences, min_count=1, vector_size=100)
+
+    example_word = "example"
+    vector = model.wv[example_word]
+    print(f"Vector for the word '{example_word}': {vector}")
+
+
+
+def makedir():
+    files = os.listdir('cache/crawl')
+    #print(files)
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            print(parts)
+
+def manual_index():
+    files = os.listdir('cache/crawl')
+    #print(files)
+    ii = codecs.open('cache/crawl/index.html','w','utf-8')
+    ii.write('<html><body><h1>Site index</h1>\n')
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            ii.write('<br /><a href="mirror/'+f+'">'+f+'</a>\n')
+
+def my_site():
+    files = os.listdir('cache/crawl')
+    output = []
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            name = m.groups()[0]
+            parts = name.split('+')
+            output.append(parts)
+    return output
+
+
+## TODO  site scraper
+## TODO  find package that extracts text from web page
+### TODO master list of what to index.
+
+## TODO PDFs and DOCXs
+## TODO fix urls w/ anchors
+
+def crawl():
+    import scrapy, logging
+    from scrapy.crawler import CrawlerProcess
+
+    logger = logging.getLogger()
+    logger.setLevel(level=logging.CRITICAL)
+    logging.basicConfig(level=logging.CRITICAL)
+    logger.disabled = True
+
+
+    avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu',  'augusoft', 
+            'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule', 
+            'admit\/search\.php', 'GavilanTrusteeAreaMaps2022\.pdf', 'schedule\/2019', 'schedule\/2020', 'schedule\/2021',
+            'schedule\/2022', 'schedule\/previous',  ]
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+        #start_urls = ['https://gavilan.curriqunet.com/catalog/iq/1826']
+        start_urls = ['https://www.gavilan.edu']
+
+
+        """
+        logging.getLogger("scrapy").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.utils.log").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.extensions.telnet").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.core.engine").setLevel(logging.CRITICAL)
+        logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
+
+        logger.disabled = True"""
+
+        def parse(self, response):
+            print('visited:', repr(response.url), 'status:', response.status)
+            done = 0
+
+            if re.search(r'\.pdf$', response.url):
+                m = re.search(r'\/([^\/]+\.pdf)$', response.url)
+                if m:
+                    print("saving to ", save_folder + '/' + clean_fn(response.url))
+                    pdf_response = requests.get(response.url)
+                    with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
+                        f.write(pdf_response.content)
+                    text = extract_text(save_folder + '/' + clean_fn(response.url))
+                    codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text)
+                    done = 1
+
+            for ext in ['doc','docx','ppt','pptx','rtf','xls','xlsx']:
+                if re.search(r'\.'+ext+'$', response.url):
+                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
+                    if m:
+                        print("saving to ", save_folder + '/' + clean_fn(response.url))
+                        pdf_response = requests.get(response.url)
+                        with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
+                            f.write(pdf_response.content)
+                        #text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt')
+                        pandoc_infile = save_folder + '/' + clean_fn(response.url) 
+                        pandoc_outfile = save_folder + '/' + clean_fn(response.url) + '.html'
+                        print("pandoc in file: %s" % pandoc_infile)
+                        print("pandoc outfile: %s" % pandoc_outfile)
+                        pypandoc.convert_file(pandoc_infile, 'html', outputfile=pandoc_outfile, extra_args=['--from=%s' % ext, '--extract-media=%s' % save_folder + '/img' ]) 
+                        pandoc_output = codecs.open(pandoc_outfile,'r','utf-8').read()
+                        txt_output = trafilatura.extract(pandoc_output,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
+                        if txt_output:
+                            codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output)
+                        done = 1
+
+            for ext in ['jpg','jpeg','gif','webp','png','svg','bmp','tiff','tif','ico']:
+                if re.search(r'\.'+ext+'$', response.url):
+                    m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
+                    if m:
+                        print("saving to ", save_folder + '/img/' + clean_fn(response.url))
+                        pdf_response = requests.get(response.url)
+                        with open(save_folder + '/img/' + clean_fn(response.url), 'wb') as f:
+                            f.write(pdf_response.content)
+                        done = 1
+
+            if not done:
+                f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8')
+
+                this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
+                if this_output:
+                    f_out.write(this_output)
+                f_out.close()
+                links = response.css('a::attr(href)').getall()
+
+                # Follow each link and parse its contents
+                for link in links:
+                    go = 1
+                    full_link = response.urljoin(link)
+                    print('++++++ trying ', full_link)
+
+                    if not re.search(r'gavilan\.edu',full_link):
+                        go = 0
+                        print('--- not gav edu')
+                    else:
+                        if re.search(r'hhh\.gavilan\.edu',full_link):
+                            pass
+                        elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link):
+                            # need to add www to gavilan.edu
+                            m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link)
+                            if m:
+                                full_link = m.group(1) + 'www.' + m.group(2)
+                    for a in avoid:
+                        if re.search(a,full_link):
+                            go = 0
+                            print('--- avoid ', a)
+
+                    if go: yield scrapy.Request(full_link, callback=self.parse,
+                                        headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"})
+                    else:
+                        print("------ avoiding ", full_link)
+    # Instantiate a CrawlerProcess object
+    process = CrawlerProcess()
+
+    # Add the MySpider spider to the process
+    process.crawl(MySpider)
+
+    # Start the process
+    logging.basicConfig(level=logging.CRITICAL)
+    logging.getLogger('scrapy').propagate = False
+    logging.getLogger("trafilatura").setLevel(logging.CRITICAL)
+    logging.getLogger("trafilatura").propagate = False
+    logging.getLogger("pdfminer").setLevel(logging.CRITICAL)
+    logging.getLogger("pdfminer").propagate = False
+    logging.getLogger("urllib3").setLevel(logging.CRITICAL)
+    logging.getLogger("urllib3").propagate = False
+    logging.basicConfig(level=logging.CRITICAL)
+    process.start()
+
+
+
+save_folder = 'cache/crawl'
+clean_folder = 'cache/cleancrawl'
+
+
+
+def txt_clean_index():
+    files = os.listdir(save_folder)
+    line_freq = defaultdict(int)
+    
+    # first pass
+    for f in files:
+        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
+        for L in lines:
+            L = L.strip()
+            line_freq[L] += 1
+    
+    # second pass
+    for f in files:
+        print("\n\n",f)
+        lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
+        out = codecs.open(clean_folder + '/' + f,'w','utf-8')
+        for L in lines:
+            L = L.strip()
+            if L in line_freq and line_freq[L] > 3:
+                continue
+            print(L)
+            out.write(L + '\n')
+        out.close()
+
+
+
+
+from whoosh import fields, columns
+from whoosh.index import create_in, open_dir
+from whoosh.fields import Schema, TEXT, ID, STORED, NUMERIC
+from whoosh.qparser import QueryParser
+from whoosh.analysis import StemmingAnalyzer
+
+def priority_from_url(url):
+    priority = 1
+    # url is like this: https++www.gavilan.edu+news+Newsletters.php.txt
+    m = re.search(r'gavilan\.edu\+(.*)\.\w\w\w\w?$',url)
+    if m:
+        address = m.group(1)
+        parts = address.split('+')
+        if parts[0] in ['accreditation','curriculum','senate','research','old','committee','board','styleguide']:
+            priority += 20
+        if parts[0] in ['news','IT','HOM','administration']:
+            priority += 10
+        if parts[0] == 'admit' and parts[1] == 'schedule':
+            priority += 10
+        if 'accreditation' in parts:
+            priority += 50
+        if re.search(r'hhh\.gavilan\.edu',url):
+            priority += 100
+        priority *= len(parts)
+        #print(priority, parts)
+    else:
+        priority *= 50
+        #print(priority, url)
+    return priority
+
+
+def test_priority():
+    ff = os.listdir('cache/crawl')
+    for f in ff:
+        priority_from_url(f)
+
+
+
+def displayfile(f,aslist=0):
+    lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines()
+    lines = [L.strip() for L in lines]
+    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
+    if aslist:
+        return lines
+    return "\n".join(lines)
+
+def any_match(line, words):
+    # true if any of the words are in line
+    for w in words:
+        if re.search(w, line, re.IGNORECASE):
+            return True
+    return False
+
+
+def find_match_line(filename, query):
+    q_words = query.split(" ")
+    lines = codecs.open('cache/crawl/' + filename,'r','utf-8').readlines()
+    lines = [L.strip() for L in lines]
+    lines = [L for L in lines if L and not re.search(r'^\|$',L)]
+    lines = [L for L in lines if any_match(L, q_words)]
+    return "\n".join(lines)
+
+
+
+def search_index():
+    s = ''
+    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
+    ix = open_dir("cache/searchindex")
+
+    
+    #with ix.reader() as reader:
+        #print(reader.doc_count())   # number of documents in the index
+        #print(reader.doc_frequency("content", "example"))   # number of documents that contain the term "example" in the "content" field
+        #print(reader.field_length("content"))   # total number of terms in the "content" field
+        #print(reader.term_info("content", "example"))   # information about the term "example" in the "content" field
+        #print(reader.dump())   # overview of the entire index
+
+
+    while s != 'q':
+        s = input("search or 'q' to quit: ")
+        if s == 'q':
+            return
+
+        # Define the query parser for the index
+        with ix.searcher() as searcher:
+            query_parser = QueryParser("content", schema=schema)
+
+            # Parse the user's query
+            query = query_parser.parse(s)
+            print(query)
+
+            # Search the index for documents matching the query
+            results = searcher.search(query, sortedby="priority")
+
+            # Print the results
+            i = 1
+            for result in results:
+                print(i, result)   # result["url"],  result["content"])
+                print(find_match_line(result['url'], s))
+                print()
+                i += 1
+
+
+
+def create_search_index():
+    # Define the schema for the index
+
+    stem_ana = StemmingAnalyzer()
+    schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
+
+    # Create a new index in the directory "myindex"
+    ix = create_in("cache/searchindex", schema)
+
+    # Open an existing index
+    #ix = open_dir("cache/searchindex")
+
+    # Define the writer for the index
+    writer = ix.writer()
+
+    # Index some  documents
+    files = os.listdir('cache/crawl')
+    files.sort()
+    for f in files:
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            print(f)
+            writer.add_document(url=f, title=m.group(1), content=displayfile(f), priority=priority_from_url(f))
+    writer.commit()
+
+
+
+from annoy import AnnoyIndex
+import random
+
+def test_embed():
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    sample = "What is this world coming to? What happens in the data and the research?"
+    embed = model.encode(sample)
+
+    print("\nSample sentence:", sample)
+    print("\nEmbedding:", embed)
+    print("\nEmbedding size:", len(embed))
+
+
+def create_embeddings():
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    vecsize = 384   # sentence transformer embedding size
+    t = AnnoyIndex(vecsize, 'angular')
+    files = os.listdir('cache/crawl')
+    output = []    #  ['index', 'file','sentence']
+    index = 0
+    save_embeds = []  
+    files.sort()
+    for f in files:
+        print(f)
+        m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
+        if m:
+            lines = displayfile(f,1)
+            embeddings = model.encode(lines)
+
+            print("\n-----", index, f)
+
+            for sentence, embedding in zip(lines, embeddings):
+                if len(sentence.split(' ')) > 5:
+                    print(index, "Sentence:", sentence)
+                    print(embedding[:8])
+                    t.add_item(index, embedding)
+                    output.append( [index,f,sentence] )
+                    index += 1
+        if index > 500:
+            break
+    t.build(30) # 30 trees
+    t.save('cache/sentences.ann')
+    pickle.dump( output, open( "cache/embedding_index.p", "wb" ) )
+
+
+
+
+def search_embeddings():
+    f = 384   # sentence transformer embedding size
+    n = 10    # how many results
+
+    u = AnnoyIndex(f, 'angular')
+    u.load('cache/sentences.ann') # super fast, will just mmap the file
+    print(u.get_n_items(), "items in index")
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    search_index = pickle.load( open( "cache/embedding_index.p", "rb" ) )
+    print(search_index)
+
+
+    s = ''
+    while s != 'q':
+        s = input("search or 'q' to quit: ")
+        if s == 'q':
+            return
+        query_embedding = model.encode(s)
+        results = u.get_nns_by_vector(query_embedding, n)
+
+        # Print the top 5 results
+        for i, r in enumerate(results):
+            print(f'Top {i+1}: {r}, {search_index[r]}')     #{file} - {sentence} - (Score: {score})')
+
+
+
+
+
+if __name__ == "__main__":
+    
+    print ('')
+    options = { 1: ['demo vector search', demo_vector_search],
+                8: ['crawl',crawl],
+                9: ['clean text index', txt_clean_index],
+               10: ['make web dir struct', manual_index],
+               11: ['create search embeddings', create_embeddings],
+               12: ['create search index', create_search_index],
+               13: ['do an index search', search_index],
+               14: ['do a vector search', search_embeddings],
+               15: ['test priority', test_priority], 
+               16: ['test embed', test_embed]
+              }
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+    
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+    
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
diff --git a/semesters.py b/semesters.py
index bc4f196..f33badb 100644
--- a/semesters.py
+++ b/semesters.py
@@ -1,11 +1,105 @@
 # Try to gather all the different formats and ways of labeling a semester, along with their associated dates.
 
-import json, funcy
+import json, funcy, re, sys
 
-sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
+# sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
+
+season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
+
+# Inverse
+number_to_season = {v: k for k, v in season_to_number.items()}
+
+s_to_n = {'sp':'30','su':'50','fa':'70'}
+season_to_short = {
+    'Summer': 'su',
+    'Fall': 'fa',
+    'Winter': 'wi',
+    'Spring': 'sp'
+}
 
 
-standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
+# Given 'fa22' return 202270
+def short_to_sis(s):
+    season = s[0:2]
+    return "20" + s[2:5] + s_to_n[season]
+
+
+# go from sp20 to 2020spring
+def short_to_long(s):
+    parts = re.search(r'(\w\w)(\d\d)', s)
+    yr = parts.group(2)
+    season = parts.group(1)
+    seasons = {'sp':'spring','su':'summer','fa':'fall','wi':'winter'}
+    return '20'+yr+seasons[season]
+
+# from "Summer 2024" to 202450
+def human_to_sis(semester):
+    try:
+        # Split the semester into its components
+        parts = semester.split()
+            
+        # Extract the season and year
+        season = parts[0]
+        year = parts[1]
+    
+        # Generate the code in the format "YYYYSS"
+        return f"{year}{season_to_number[season]}"
+    except Exception as e:
+        print(f"Couldn't parse semester: {semester}'")
+        return ''
+
+# from 202450 to "Summer 2024"
+def sis_to_human(sis_code):
+    try:
+        # Extract the year and season code
+        year = sis_code[:4]
+        season_code = sis_code[4:]
+        
+        # Convert season code back to season name
+        season = number_to_season.get(season_code, "Unknown")
+        
+        return f"{season} {year}"
+    except Exception as e:
+        print(f"Couldn't parse SIS code: {sis_code}")
+        return ''
+
+# from "Summer 2024" to su24
+def human_to_short(semester):
+    # Split the semester into its components
+    parts = semester.split()
+    
+    # Extract the season and year
+    season = parts[0]
+    year = parts[1][2:]  # Take the last two digits of the year
+    
+    # Generate the short form
+    return f"{season_to_short[season]}{year}"
+
+# given human readable form (Spring 2023) return that of the previous semester. Excluding winter.
+def get_previous_season(season_year_str):
+    season_order = {"Spring": "Fall", "Summer": "Spring", "Fall": "Summer"}
+    
+    try:
+        season, year = season_year_str.split()
+        year = int(year)
+        
+        if season not in season_order or not (2000 <= year <= 2030):
+            raise ValueError("Invalid season or year")
+
+        previous_season = season_order[season]
+
+        # Decrement the year if transitioning from Spring to Fall
+        if season == "Spring":
+            year -= 1
+
+        return f"{previous_season} {year}"
+    
+    except Exception as e:
+        return f"Error: {e}"
+
+standard = ['Fall 2026', 'Summer 2026', 'Spring 2026', 'Winter 2026',
+            'Fall 2025', 'Summer 2025', 'Spring 2025', 'Winter 2025',
+            'Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
             'Fall 2023', 'Summer 2023', 'Spring 2023', 'Winter 2023',
             'Fall 2022', 'Summer 2022', 'Spring 2022', 'Winter 2022',
             'Fall 2021', 'Summer 2021', 'Spring 2021', 
@@ -14,9 +108,11 @@ standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
             'Fall 2018', 'Summer 2018', 'Spring 2018',
             'Fall 2017', 'Summer 2017', 'Spring 2017', ]
 
-code =  'fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
+code =  'fa26,su26,sp26,wi26,fa25,su25,sp25,wi25,fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
 
-begin = ['08/26','06/10','01/29','01/02',
+begin = ['08/25','05/22','01/26','01/01',  # not sure on fa26
+         '08/25','05/27','01/27','01/02',
+         '08/26','06/10','01/29','01/02',
          '08/28','06/12','01/30','01/03',
          '08/22','06/13','01/31','01/04',
          '08/23','06/14','02/01',
@@ -27,32 +123,66 @@ begin = ['08/26','06/10','01/29','01/02',
 
 canvas_label = []
 
-semester_list = {}
-
-season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
-s_to_n = {'sp':'30','su':'50','fa':'70'}
+sems_by_human_name = {}
 
 for s in list(zip(standard,code,begin)):
     season,year = s[0].split(' ')
     cl = year + " " + season
     sem_record = {'name': s[0], 'code': s[1], 'start': s[2] + '/' + s[1][-2:], 'number': year + season_to_number[s[0].split(' ')[0]]}
-    semester_list[s[0]] = sem_record
-    semester_list[s[1]] = sem_record
+    sems_by_human_name[s[0]] = sem_record
+    sems_by_human_name[s[1]] = sem_record
     canvas_label.append(cl)
-    semester_list[cl] = sem_record
+    sems_by_human_name[cl] = sem_record
+
+sems_by_short_name = funcy.project(sems_by_human_name, code)
 
 
-# Given 'fa22' return 202270
-def to_sis_sem(s):
-    season = s[0:2]
-    return "20" + s[2:5] + s_to_n[season]
+def dump():
+    print("BY HUMAN READABLE NAME")
+    print(json.dumps(sems_by_human_name,indent=2))
+
+    print("\n\nBY SEM SHORTCODE")  
+    print(json.dumps(sems_by_short_name,indent=2))
 
 
 
-# print(json.dumps(semester_list,indent=2))
 
-sems = funcy.project(semester_list, code)
-#print(json.dumps(sems,indent=2))
+
+def weeks_from_date():
+    from datetime import datetime, timedelta
+
+    weeks = int( input("how many weeks ahead? "))
+
+    # Replace this with your starting date
+    x = datetime.strptime("2025-05-27", "%Y-%m-%d")
+
+    # Add six weeks
+    six_weeks_later = x + timedelta(weeks=weeks)
+
+    print("Six weeks later:", six_weeks_later.strftime("%Y-%m-%d"))
+
+
+if __name__ == "__main__":
+    print ('')
+    options = { 1: ['print semester info',dump] ,  
+                2: ['compute x weeks from date', weeks_from_date ],
+    }
+        
+    
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+        
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
 
 """
 
diff --git a/ssb.py b/ssb.py
new file mode 100644
index 0000000..8f28e59
--- /dev/null
+++ b/ssb.py
@@ -0,0 +1,590 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+import pandas as pd
+from datetime import datetime
+import time, codecs, traceback
+from bs4 import BeautifulSoup as bs
+from io import StringIO
+from time import strptime
+from deepdiff import DeepDiff
+from datetime import datetime as dt
+from dateutil import parser                          
+from util import fix_t_name, split_class_dept, split_class_code, split_class_code_letter
+import json, re, sys, os, codecs, csv, pathlib
+import schedules
+
+def writepage(txt):
+    errfile = codecs.open('lastpage.txt','w','utf-8')
+    errfile.write(txt)
+    errfile.close()
+
+
+
+DEBUG = 0
+
+def d(s,end=''): 
+    global DEBUG
+    if end and DEBUG: print(s,end=end)
+    elif DEBUG: print(s)
+
+
+
+# Schedule / course filling history
+# csv headers:  crn, code, teacher,  datetime, cap, act, wlcap, wlact
+# Log the history of enrollments per course during registration
+def log_section_filling(current_sched_list, short_sem):
+    rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
+    rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
+    print(rows_j)
+    now = datetime.now().strftime('%Y-%m-%dT%H-%M')
+    csv_fn = 'cache/reg_history_' + short_sem + '.csv'
+    with codecs.open(csv_fn,'a','utf-8') as f:
+        writer = csv.writer(f)
+        for S in current_sched_list:
+            #print(S)
+            items = [now,]
+            [ items.append( S[X] ) for X in rows_j ]
+            writer.writerow(items)
+            
+# Same as above, but compressed, act only
+def log_section_filling2(current_sched_list, short_sem):
+
+
+
+    now = datetime.now().strftime('%Y-%m-%dT%H')
+    
+    todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
+    #print(todays_data)
+    
+    todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+    todays_df = todays_df.rename_axis('crn')
+    #print(todays_df)
+    todays_df.to_csv('cache/reg_today_new.csv', index=True)
+    
+    try:
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        print(myframe)
+    except:
+        fff = open('cache/reg_data_'+short_sem+'.csv','w')
+        fff.write('crn\n')
+        fff.close()
+        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
+        #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
+        #myframe = myframe.rename_axis('crn')
+        print("Creating new data file for this semester.")
+    
+    new_df = myframe.join( todays_df, on='crn', how='outer' )
+    new_df = new_df.rename_axis('crn')
+    print(new_df)
+    
+    reg_data_filename = 'reg_data_' + short_sem + '.csv'
+    new_df.to_csv('cache/' + reg_data_filename, index=False)
+    
+
+# Take banner's html and make a csv(?) file  
+def ssb_to_csv(src):
+    #out = codecs.open(schedfile,'w','utf-8')
+    output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
+    b = bs(src, 'html.parser')
+    tab = b.find(class_="datadisplaytable")
+    if not tab:
+        print("hmm... didn't find a 'datadisplaytable' in this html: ")
+        #print(src)
+        return 0
+    rows = tab.find_all('tr')
+    drows = list(filter(row_has_data,rows))
+    for dd in drows:
+        t = row_text(dd)
+        output += t
+    return output
+    
+
+
+# take text lines and condense them to one dict per section    
+def to_section_list(input_text,verbose=0):
+    this_course = ''
+    #todo: no output files
+    #jout = codecs.open(filename, 'w', 'utf-8')
+    #input = csv.DictReader(open(schedfile,'r'))
+    #input = UnicodeDictReader(input_text.splitlines())
+    all_courses = []
+    
+    
+    try:
+        f = StringIO(input_text)
+    except:
+        print("ERROR with this input_text:")
+        print(input_text)
+    reader = csv.reader(f, delimiter=',')
+    headers = next(reader)
+    for r in reader:
+        d = dict(list(zip(headers,r)))
+        #pdb.set_trace()
+        # clean funny unicode char in blank entries
+        r = {k: clean_funny2(v) for k,v in list(d.items()) }
+        if verbose: print("Cleaned: " + str(r))
+        
+        if 'time' in r:
+            if r['time']=='TBA': r['time'] = ''
+            if r['time']: r['partofday'] = time_to_partofday(r['time'])
+            
+        r['type'] = ''
+        
+        if 'loc' in r:
+            if r['loc'] == 'ONLINE': r['type'] = 'online'
+            if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
+            if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
+            if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
+        
+        if 'code' in r:
+            if re.search(r'ONLINE\sLIVE',r['code']):
+                r['type'] = 'online live'
+            elif re.search(r'ONLINE',r['code']):
+                r['type'] = 'online'
+                
+        # does it have a section? it is the last course
+        if r['crn']:   # is a new course or a continuation?
+            if verbose: print("  it's a new section.")
+            if this_course:
+                if not this_course['extra']: this_course.pop('extra',None)
+                all_courses.append(this_course)
+            this_course = r
+            #print(r['name'])
+            this_course['extra'] = []
+        else:
+            # is a continuation line
+            if verbose: print("  additional meeting: " + str(r))
+            for k,v in list(r.items()):
+                if not v: r.pop(k,None)
+            # TODO: if extra line is different type?
+            #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
+            #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
+            this_course['extra'].append(r)
+    return all_courses
+
+
+##
+## SCHEDULE PARSE HELPERS
+##
+##
+def time_to_partofday(t):
+    #todo: account for multiple sites/rows
+    # 11:20 am-12:10 pm
+    mor     = strptime('12:00 PM', '%I:%M %p')
+    mid     = strptime( '2:00 PM', '%I:%M %p')
+    aft     = strptime( '6:00 PM', '%I:%M %p')
+    if t == 'TBA':
+        return 'TBA'
+    t = t.upper()    
+    parts = t.split('-')
+    try:
+        begin = strptime(parts[0], '%I:%M %p')
+        end = strptime(parts[1], '%I:%M %p')
+        if end > aft:
+            return "Evening"
+        if end > mid:
+            return "Afternoon"
+        if end > mor:
+            return "Midday"
+        return "Morning"
+        #return begin,end
+    except Exception as e:
+        #print 'problem parsing: ', t, "   ",
+        return ""
+
+# Deduce a 'site' field, based on room name and known offsite locations    
+def room_to_site(room,verbose=0):
+    #todo: account for multiple sites/rows
+    #todo: better way to store these offsite labels
+    othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')    
+    # is it gilroy, mh, hol, other, online or hybrid?
+    site = 'Gilroy'
+    #if len(course[0]) > 13:
+    #    room = course[0][13]
+    if room in othersites:
+        site = "Other"
+    if room == 'TBA':
+        site = 'TBA'
+    if room == 'AV':
+        site = 'San Martin Airport'
+    if re.search('MHG',room):
+        site = 'Morgan Hill'
+    if re.search('HOL',room):
+        site = 'Hollister'
+    if re.search('COY',room):
+        site = 'Coyote Valley'
+    if re.search('OFFSTE',room):
+        site = 'Other'
+    if re.search('ONLINE',room):
+        site = 'Online'
+    if verbose: print(room, '\t', end=' ') 
+    return site
+  
+
+
+def row_has_data(r):      # helper    
+    if r.find_all('th'):
+        return False
+    if len(r.find_all('td')) > 2:
+        return True
+    if re.search('Note\:', r.get_text()):
+        return True
+    return False
+
+def row_text(r):   # helper  
+    #global dbg
+    
+    d("Row Txt Fxn gets:  ")
+    arr = []
+    for t in r.find_all('td'):
+        if t.contents and len(t.contents) and t.contents[0].name == 'img':
+            arr.append("1")
+            d("img")
+        r_text = t.get_text()
+        arr.append(r_text)
+        if 'colspan' in t.attrs and t['colspan']=='2':
+            d('[colspan2]')
+            arr.append('')
+        d("\t"+r_text, end=" ")
+    d('')
+    
+    if len(arr)==1 and re.search('Note\:',arr[0]):
+        note_line = clean_funny( arr[0] )
+        note_line = re.sub(r'\n',' ', note_line)
+        note_line = re.sub(r'"','', note_line)
+        #note_line = re.sub(r',','\,', note_line)
+        return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
+    del arr[0]
+    arr[1] = clean_funny(arr[1])
+    arr[2] = clean_funny(arr[2])
+    if arr[1]: arr[1] = arr[1] + " " + arr[2]
+    del arr[2]
+    arr = [ re.sub(r'&nbsp;','',a) for a in arr]
+    arr = [ re.sub(',','. ',a) for a in arr]
+    arr = [ re.sub('\(P\)','',a) for a in arr]
+    arr = [ a.strip() for a in arr]
+    #del arr[-1]
+    r = ','.join(arr)+'\n'
+    r = re.sub('\n','',r)
+    r = re.sub('add to worksheet','',r)
+    d("Row Txt Fxn returns:  " + r + "\n\n")
+
+    return r + '\n'
+
+
+
+
+        
+def clean_funny(str):
+    if str and str.encode('utf8') == ' ': return ''
+    return str
+def clean_funny2(str):
+    if str and str == '\xa0': return ''
+    if str and str == ' ': return ''
+    return str
+    
+def clean_funny3(str):
+    return re.sub('\xa0','',str)
+
+
+def scrape_schedule(short_sem, semester_label):
+    # Set up Chrome options
+    chrome_options = Options()
+    #chrome_options.add_argument("--headless")  # Run headless
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+
+    # Start WebDriver
+    driver = webdriver.Chrome(options=chrome_options)
+
+
+
+    URL = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
+
+    GOO = "G00102586"
+    GOO_PIN = "987654bb"
+
+    filename = f"{short_sem}_sched.json"
+    filename_html = f"{short_sem}_sched.html"
+
+
+    try:
+        # Open page
+        driver.get(URL)
+        writepage(driver.page_source)
+        print(driver.title)
+    
+    
+        driver.find_element(By.ID,"UserID").clear()
+        driver.find_element(By.ID,"UserID").send_keys(GOO)
+        driver.find_element(By.NAME,"PIN").send_keys(GOO_PIN)
+        driver.find_element(By.NAME,"loginform").submit()
+        print('login')
+        driver.implicitly_wait(5)
+    
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Student").click()
+        print('students')
+        driver.implicitly_wait(5)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Registration").click()
+        print('registration')
+        driver.implicitly_wait(5)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.LINK_TEXT,"Search for Classes").click()
+        print('search for classes')
+        driver.implicitly_wait(15)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        dd = Select(driver.find_element(By.NAME,"p_term"))
+        if (dd):
+            dd.select_by_visible_text(semester_label)    
+            driver.find_element(By.XPATH,"/html/body/div/div[4]/form").submit()
+            print('semester')
+            driver.implicitly_wait(15)
+
+            writepage(driver.page_source)
+            print(driver.title)
+
+        driver.find_element(By.XPATH,"/html/body/div/div[4]/form/input[18]").click()   
+        print('advanced?')
+        driver.implicitly_wait(10)
+
+        writepage(driver.page_source)
+        print(driver.title)
+    
+        driver.find_element(By.NAME,"SUB_BTN").click()
+        print('login')
+        driver.implicitly_wait(40)
+        time.sleep(15)
+        driver.implicitly_wait(40)
+
+        writepage(driver.page_source)
+        print(driver.title)
+        text = driver.page_source
+    
+        codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
+ 
+        ##
+        ## Start parsing html
+        ##
+    
+        as_list = ssb_to_csv(text)
+        print(as_list)
+        as_dict = to_section_list(as_list)
+        jj = json.dumps(as_dict,indent=2)
+        
+        ##
+        ## Diff from previous semester
+        ##
+        try:
+
+            ps = codecs.open('cache/'+filename,'r','utf-8')
+            prev_sched = json.loads(ps.read())
+            ps.close()
+        
+            if 1:   # sometimes I want to re-run this without affecting the logs.
+                log_section_filling(as_dict, short_sem)
+                log_section_filling2(as_dict, short_sem)
+        
+            dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
+            pretty_json = json.dumps(  json.loads( dd.to_json() ), indent=2 )
+            codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write(  pretty_json )    # dd.to_json() )
+    
+            # Next, rename the prev sched_xxYY.json data file to have its date,
+            # make this new one, and then upload it to the website. 
+            # Maybe even count the entries and do a little sanity checking
+            #
+            # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
+            # print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
+                                       
+            last_mod = time.ctime(os.path.getmtime('cache/' + filename))
+        
+            prev_stat = pathlib.Path('cache/' + filename).stat()
+            mtime = dt.fromtimestamp(prev_stat.st_mtime)
+            print(mtime)
+        except Exception as e:
+            print("Couldn't Diff.")
+            print("Got an exception: ", e)
+        # fname = pathlib.Path('test.py')
+        # assert fname.exists(), f'No such file: {fname}'  # check that the file exists
+        # print(fname.stat())
+        #
+        # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413, 
+        #                st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
+
+
+
+        codecs.open(f'cache/{filename}', 'w', 'utf-8').write(jj)
+    
+    
+        return as_dict
+ 
+    except Exception as e:
+        print("Got an exception: ", e)
+        #print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()))
+
+    finally:
+        driver.quit()
+
+
+def expanded(as_dict, short_sem):
+    #as_dict = scrape_schedule()
+
+    course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = schedules.campus_dept_hierarchy()
+    
+    expanded = list_latestarts(short_sem)
+    fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
+    
+    ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
+    with ffcsv as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(fields)
+        
+        for S in expanded:
+            parts = S['code'].split(' ')
+            S['dept'] = parts[0]
+            S['num'] = parts[1]
+            S['gp'] = course_to_gp[parts[0]]
+            S['dean'] = course_to_dean[parts[0]]
+            S['sem'] = short_sem
+            # S['act'] = S['cap']
+            if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
+            csvwriter.writerow( [ S[x] for x in fields ] )
+    
+    #put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0) 
+
+
+
+# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
+def list_latestarts(term):
+    
+    show_summary = 1
+    
+    the_year = '20' + term[2:4]
+    print("year: ", the_year, "  semester: ", term)
+    
+    #term_in = "cache/%s_sched.json" % term               
+    term_out = "cache/%s_latestarts.txt" % term
+    expanded_out = "%s_sched_expanded.json" % term
+    print("Writing output to " + term_out)
+    #infile = codecs.open(term_in, "r", "utf-8")
+    outfile = codecs.open(term_out, "w", "utf-8")
+    exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
+    expanded = []
+    #sched = json.loads(infile.read())
+
+
+    #sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
+    sched = json.loads( codecs.open(f"cache/{term}_sched.json","r","utf-8").read() )
+
+    by_date = {}
+    
+    if show_summary: print("course \t loc \t type \t time")
+    
+    for C in sched:
+        if (not C['type']) and C['loc'] != 'ONLINE':  # and C['time']:
+            C['type'] = 'in-person'
+            
+        if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
+        
+        if 'extra' in C:
+            if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
+                C['type'] = 'hybrid'
+        
+        times = C['time'].split("-")
+        if len(times) > 1:
+            time_start = times[0]
+            time_end = times[1]
+
+            try:
+                startt = time.strptime(time_start,"%I:%M %p")
+                endt = time.strptime(time_end,"%I:%M %p")
+                min_start = startt.tm_min
+                min_end = endt.tm_min
+                if min_start == 0: min_start = "00"
+                else: min_start = str(min_start)
+                if min_end == 0: min_end = "00"
+                else: min_end = str(min_end)
+                C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
+                C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
+                if 0:
+                    print("+  Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
+            except Exception as e:
+                print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
+        else:
+            C['time_start'] = ''
+            C['time_end'] = ''
+            
+        if re.search('TBA',C['date']): 
+            C['start'] = ''
+            C['end'] = ''
+            C['doy'] = ''
+            expanded.append(C)
+            continue
+
+        parts = C['date'].split("-") 
+        start = parts[0] + "/" + the_year
+        end = parts[1] + "/" + the_year
+
+        try:
+            startd = parser.parse(start)
+            endd = parser.parse(end)
+            C['start'] = "%i-%i" % (startd.month,startd.day)
+            C['end'] = "%i-%i" % (endd.month,endd.day)
+            C['doy'] = startd.timetuple().tm_yday
+            expanded.append(C)
+        except Exception as e:
+            print(e, "\n-- problem parsing ", start, " or ", end)
+        if not startd in by_date:
+            by_date[startd] = []
+        by_date[startd].append(C)
+        
+    exoutfile.write( json.dumps(expanded,indent=2) )
+    exoutfile.close()
+    #put_file('/home/public/schedule/', 'cache/', expanded_out, 0)  
+    
+    for X in sorted(by_date.keys()):
+        #print("Start: ", X)
+        if len(by_date[X]) < 200:
+            prettydate = X.strftime("%A, %B %d")
+            #print(prettydate + ": " + str(len(by_date[X])) + " courses")
+            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
+            for Y in by_date[X]:
+                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
+                #print(Y)
+                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
+                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
+    outfile.close()
+    #put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)  
+    return expanded
+
+
+
+# Get semesters to scrape
+with open('cache/to_scrape.json', 'r') as f:
+    semesters = json.load(f)
+
+# Loop through each item and call the function
+for item in semesters:
+    as_dict = scrape_schedule(item['short_sem'], item['sem'])
+    ex = expanded(as_dict, item['short_sem'])
+    print(f"Done with {item['sem']}. Sleeping 45 seconds.")
+    time.sleep(45)
+
+
diff --git a/stats.py b/stats.py
index 24bb592..e7d6661 100644
--- a/stats.py
+++ b/stats.py
@@ -675,7 +675,7 @@ def all_course_names():
     mr.write(json.dumps(master_record,indent=2))
              
 
-from semesters import semester_list, canvas_label
+from semesters import sems_by_human_name, canvas_label
 from semesters import code as semester_order
 from localcache import all_students_history
 from datetime import datetime, timedelta
@@ -683,12 +683,12 @@ from datetime import datetime, timedelta
 def semester_dates():
     #print()
     for c in canvas_label:
-        print(semester_list[c])
+        print(sems_by_human_name[c])
 
         length = 15
-        if semester_list[c]['code'][0:2] == 'su':
+        if sems_by_human_name[c]['code'][0:2] == 'su':
             length = 5
-        start_date = semester_list[c]['start']
+        start_date = sems_by_human_name[c]['start']
         # Convert the date string to a datetime object
         date_object = datetime.strptime(start_date, '%m/%d/%y')
         start_fmt = date_object.strftime('%a %b %d, %Y')
@@ -728,17 +728,17 @@ def course_line_process(line):
             current_student_block.append(current_student_info)
             normalized_blocks.append(current_student_block)
         current_student_block = []
-        current_student_info = {'first':semester_list[sem]['code'], 'last':''}
+        current_student_info = {'first':sems_by_human_name[sem]['code'], 'last':''}
         current_student = uid
         #print(f"Student: {uid} ({line['user_name']})")
 
     # line is a dict
-    current_student_info['last'] = semester_list[sem]['code']
+    current_student_info['last'] = sems_by_human_name[sem]['code']
     year, season = m1.group(1), m1.group(2)
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     create_dt = datetime.strptime(line['created'], date_format)
     update_dt = datetime.strptime(line['updated'], date_format)
-    sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y')
+    sem_start = datetime.strptime(sems_by_human_name[sem]['start'], '%m/%d/%y')
     
     course = line['course_name']
     c_parts = course.split(' ')
@@ -763,7 +763,7 @@ def course_line_process(line):
             sign = '+'
         #print(f"    {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}")
         temp_usr_name = re.sub(r',','',line['user_name'])
-        current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}")
+        current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{sems_by_human_name[sem]['code']}")
         if flow == "deleted":
             # deleted, give delete date
             del_day = sem_start - update_dt
@@ -773,7 +773,7 @@ def course_line_process(line):
                 del_day = -del_day
                 sign = '+'
             #print(f"    {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}")
-            current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}")
+            current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{sems_by_human_name[sem]['code']}")
 
 
 def normalize_course_histories():
diff --git a/users.py b/users.py
index cb40d42..eced3cf 100644
--- a/users.py
+++ b/users.py
@@ -1,6 +1,6 @@
 
 import json, codecs, requests, re, pdb, csv, textdistance, collections
-import sys, csv, string, funcy, math, shutil, imghdr, os
+import sys, csv, string, funcy, math, shutil, os
 import pytz, time
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -8,12 +8,14 @@ import matplotlib.pyplot as plt
 #from pandas import TimeGrouper
 from PIL import Image
 from collections import defaultdict
-from pipelines import fetch, fetch_stream, getSemesterSchedule, header, url, FetchError, put_file
+from pipelines import fetch, fetch_stream, header, url, FetchError, put_file
+from schedules import get_semester_schedule
 from courses import course_enrollment, users_in_semester
 from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock
 from localcache import teachers_courses_semester, course_mode, sem_schedule
 from localcache2 import all_2x_sem_courses_teachers, all_sem_courses_teachers
-from pipelines import dean, dean_names
+from schedules import campus_dept_hierarchy
+#from pipelines import dean, dean_names  #TODO
 
 from util import dept_from_name, most_common_item
 from os.path import exists, getmtime
@@ -234,7 +236,7 @@ def staff_dir(get_fresh=False):
 #
 
 def schedForTeacherOverview(long,short):
-    sem = getSemesterSchedule(short)
+    sem = get_semester_schedule(short)
     sem['type'] = sem['type'].apply(classType)
     #sem['code'] = sem[['code','type']].apply(' '.join,axis=1)
     sem['sem'] = short
@@ -1099,6 +1101,7 @@ def checkForAvatar(id=2):
 
 # Grab em. Change the first if when continuing after problems....
 def downloadPhoto():
+    import imghdr
     pix_dir = 'cache/picsCanvas2022/'
     # Update the list of all ilearn users?
     i_last_ix = '-1'
@@ -2203,17 +2206,28 @@ def cross_ref_training():
     wb = load_workbook("C:/Users/phowell/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx")
     print(wb.sheetnames)
 
+    # Fetch from Canvas DB. Make sure its recently updated.
+    # Also relies on schedule being in database. Run localcache2.courses_to_sched()
+    courses = all_2x_sem_courses_teachers('202550', '202570')        # 
+    #courses = all_sem_courses_teachers('202470')   
+
+
     # report for email
     report = codecs.open('cache/gott_report.txt','w','utf-8')
 
     # update local list of teachers from ilearn?
     RELOAD_TEACHERS = 0
+    ask = input('download new list of teachers? (y/n) ')
+    if ask.strip()=='y': RELOAD_TEACHERS = 1
+
     if RELOAD_TEACHERS:
         teacherRolesUpdateCache()
     
     # TODO inefficient but just read it again
     all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
 
+    course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = campus_dept_hierarchy()
+
     records = {}
     sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed', 'SU21 Workshop', 'BOOT CAMP', 'GOTT ABC', 'TITLE V GOTT ACADEMY', 'Other Certifications']
     for sname in sheets:
@@ -2221,7 +2235,7 @@ def cross_ref_training():
         sheet = wb[sname]
         records[sname] = {}
         for row in sheet.iter_rows():
-            if row[0].value == 'G Number': continue
+            if row[1].value == 'ID': continue
             goo = row[1].value
             rowvals = [str(v.value) for v in row]
             records[sname][goo] = rowvals
@@ -2233,17 +2247,39 @@ def cross_ref_training():
     teachers_bydept = defaultdict(set)
     alldepts = set()
 
+    # reconfigure to key on goo
+    by_goo = defaultdict(dict)
 
-    # courses = all_2x_sem_courses_teachers('202450', '202470')        # 
-    courses = all_sem_courses_teachers('202470')   
+    for course,coursedict in records.items():
+        print(course)
+        for goo,record in coursedict.items():
+            if goo=='ID': continue
+            if record[0]=='Name': continue
+            print(f"    {goo}")
+            try:
+                if record[4]=="None":
+                    by_goo[goo][course] = "ok"
+                else:
+                    by_goo[goo][course] = record[4]   # record the end date
+            except:
+                print(f"  -- problem with this record {json.dumps(record)}")
+                by_goo[goo][course] = 'ok'
+
+    bg_file = codecs.open('cache/gott_by_goo.json','w','utf-8')
+    bg_file.write(json.dumps(by_goo,indent=2))
+
+
+    ##
+    ## Start looking at the courses to cross reference
+    ##
     for c in courses:
         print(c)
         try:
-            goo = c[6]
-            crn = c[8]
+            goo = c[8]
+            crn = c[4]
             name = c[1]        # full course name
-            teacher = c[4]     # last, first
-            ctype = c[7]
+            teacher = c[6]     # last, first
+            ctype = c[3]
             dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1)
             alldepts.add(dept1)
             d = list(c)
@@ -2267,16 +2303,16 @@ def cross_ref_training():
     flagfont = PatternFill("solid", fgColor="00FFFFCC")
 
     for thedean in ['et','nl','ss','jn', 'de']:
-        sheet.cell(row=r, column=1).value = dean_names[thedean]
+        sheet.cell(row=r, column=1).value = dean_code_to_name[thedean]
         sheet.cell(row=r, column=1).font = deptfont
         r += 2
 
-        report.write(f"Dean: {dean_names[thedean]}\n")
+        report.write(f"Dean: {dean_code_to_name[thedean]}\n")
 
         for D in alldepts:
-            if not D in dean:
+            if not D in course_to_dean:
                 print(f"MISSING DEAN for dept: {D}")
-            if dean[D] == thedean:
+            if course_to_dean[D] == thedean:
                 if len(teachers_bydept[D]) == 0: continue
                 print(f"\n------------\n{D}")
                 sheet.cell(row=r, column=1).value = D
@@ -2289,8 +2325,8 @@ def cross_ref_training():
                     waived = 0
                     sects = teachers[t]
                     print(f"Sections for {t}: {sects}")
-                    goo = sects[0][6]
-                    course_mode = sects[0][7]
+                    goo = sects[0][8]
+                    course_mode = sects[0][3]
                     print(t)
                     sheet.cell(row=r, column=1).value = f"{t}"
                     sheet.cell(row=r, column=2).value = f"{goo}"
@@ -2521,6 +2557,21 @@ def cross_ref_training_withcsv():
                 print(f"   {s[8]}   {s[2]}")
             print()
 
+def get_portfolios(id=0):
+    if not id:
+        id = int( input( "what user id? "))
+
+    p = fetch( f"{url}/api/v1/users/{id}/eportfolios" )
+
+    print(json.dumps(p, indent=2))
+
+
+def get_port_pages(id=0):
+    if not id:
+        id = int( input("what portfolio id? "))
+
+    p = fetch(f"{url}/api/v1/eportfolios/{id}/pages")
+    print(json.dumps(p, indent=2))
 
 
 
@@ -2552,6 +2603,10 @@ if __name__ == "__main__":
                 
                 25: ['cross ref training', cross_ref_training],
                 26: ['find goo numbers in training spreadsheet', training_find_goos],
+
+                30: ['get portfolios for user id', get_portfolios],
+                31: ['get portfolio pages for portfolio id', get_port_pages],
+
                 #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
                 #5: ['Match names in schedule & ilearn', match_usernames],
                 #6: ['Create Dept\'s ZTC list', create_ztc_list],
diff --git a/util.py b/util.py
index 79eebfb..270e8b9 100644
--- a/util.py
+++ b/util.py
@@ -13,6 +13,43 @@ import functools
 
 from functools import reduce
 
+# Teacher name format changed. Remove commas and switch first to last   
+def fix_t_name(str):
+    str = str.strip()
+    str = re.sub('\s+',' ',str)
+    parts = str.split(', ')
+    if len(parts)>1:
+        return parts[1].strip() + " " + parts[0].strip()
+    return str
+
+# Separate dept and code
+def split_class_dept(c):
+    return c.split(' ')[0]
+def split_class_code(c):
+    num = c.split(' ')[1]
+    parts = re.match('(\d+)([a-zA-Z]+)',num)
+    #ret = "Got %s, " % c
+    if parts:
+        r = int(parts.group(1))
+        #print(ret + "returning %i." % r)
+        return r
+    #print(ret + "returning %s." % num)
+    return int(num)
+def split_class_code_letter(c):
+    num = c.split(' ')[1]
+    parts = re.match('(\d+)([A-Za-z]+)',num)
+    if parts:
+        return parts.group(2)
+    return ''
+
+def nowAsStr():    
+    #Get the current time, printed in the right format
+    currentTime = datetime.datetime.utcnow()
+    prettyTime = currentTime.strftime('%a, %d %b %Y %H:%M:%S GMT')
+    return prettyTime
+
+
+
 def contains_key_value(lst, x, y):
     """
     Checks if a list contains a dictionary with a specific key-value pair.
@@ -236,3 +273,15 @@ def partition(times_list):
     dd.write(json.dumps(timeline_times))
     
     return sessions
+
+
+def clean_fn(s):
+    s = re.sub(r'[\s:]+','',s)
+    s = re.sub(r'\/','+',s)
+    return s
+    
+def format_html(html):
+    soup = bs(html, 'html.parser')
+    return soup.prettify()
+    
+
diff --git a/video.py b/video.py
new file mode 100644
index 0000000..222316e
--- /dev/null
+++ b/video.py
@@ -0,0 +1,182 @@
+# Tools for detecting video embeds, swapping SRT subtitle files, etc
+
+
+import codecs, re, requests, json, os, webbrowser
+from bs4 import BeautifulSoup as bs
+from util import minimal_string, stripper, mycleaner
+from content import grab_course_pages
+from pipelines import put_file
+
+
+# Use template to build html page with homegrown subtitles    
+def build_srt_embed_php(data):
+    template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
+    result = ''
+    for L in template:
+        L = re.sub('FRAMEID',data['frameid'],L)
+        L = re.sub('TITLE',data['title'],L)
+        L = re.sub('EMBEDLINK',data['embedlink'],L)
+        L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
+        result += L
+    return result
+ 
+
+
+
+def yt_title(code):
+    global saved_titles
+    if code in saved_titles:
+        return saved_titles[code]
+    a = requests.get('https://www.youtube.com/watch?v=%s' % code)
+    bbb = bs(a.content,"lxml")
+    ccc = bbb.find('title').text
+    ccc = re.sub(r'\s\-\sYouTube','',ccc)
+    saved_titles[code] = ccc
+    codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
+    return ccc
+ 
+def swap_youtube_subtitles():
+    # example here:  http://siloor.github.io/youtube.external.subtitle/examples/srt/
+    
+    # srt folder, look at all filenames
+    srtlist = os.listdir('video_srt')
+    i = 0
+    for V in srtlist:
+        print(str(i) + '.  ' + V)
+        i += 1
+    choice = input("Which SRT folder? ")
+    choice = srtlist[int(choice)]
+    srt_folder = 'video_srt/'+choice
+    class_srt_folder = choice
+    srt_files = os.listdir(srt_folder)
+    srt_shorts = {}
+    print("\nThese are the subtitle files: " + str(srt_files))
+    for V in srt_files:
+        if V.endswith('srt'):
+            V1 = re.sub(r'(\.\w+$)','',V)
+            srt_shorts[V] = minimal_string(V1)
+    
+    crs_id = input("What is the id of the course?  ")
+    grab_course_pages(crs_id)
+    v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+    v1_content = v1_pages.read()
+    
+    # a temporary page of all youtube links
+    tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
+    
+    # course pages, get them all and look for youtube embeds
+    title_shorts = {}
+    title_embedlink = {}
+    title_list = []
+    print("I'm looking for iframes and youtube links.")
+    for L in v1_content.split('\n'):
+        if re.search('<a.*?href="https:\/\/youtu',L):
+            print("Possibly there's a linked video instead of embedded:" + L)
+        if re.search('iframe',L):
+            ma = re.compile('(\w+)=(".*?")')
+            #print "\n"
+            this_title = ''
+            for g in ma.findall(L):
+                print(g)
+                if g[0]=='title':
+                    this_title = g[1].replace('"','')
+                if g[0]=='src':
+                    this_src = g[1].replace('"','')
+                #print g
+            if not this_title:
+                tmp = re.search(r'embed\/(.*?)\?',this_src)
+                if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
+                if tmp:
+                    this_title = yt_title(tmp.groups()[0])
+            title_shorts[this_title] = minimal_string(this_title)
+            title_list.append(this_title)
+            title_embedlink[this_title] = this_src    
+            print("%s\n" % this_title.encode('ascii','ignore'))
+            tp.write(  "%s<br><a target='_blank' href='%s'>%s</a><br /><br />" % (this_title, this_src, this_src) )
+    # match them
+    # lowercase, non alpha or num chars become a single space, try to match
+    # if any srts remain unmatched, ask. 
+    tp.close()
+    webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
+    
+    matches = {}                    # key is Title, value is srt file
+    for S,v in list(srt_shorts.items()):
+        found_match = 0
+        print(v, end=' ')
+        for T, Tv in list(title_shorts.items()):
+            if v == Tv:
+                print(' \tMatches: ' + T, end=' ')
+                found_match = 1
+                matches[T] = S
+                break
+        #print "\n"
+    
+    print("\nThese are the srt files: ")
+    print(json.dumps(srt_shorts,indent=2))
+    print("\nThese are the titles: ")
+    print(json.dumps(title_shorts,indent=2))
+    print("\nThese are the matches: ")
+    print(json.dumps(matches,indent=2))
+    
+    print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
+    
+    for S,v in list(srt_shorts.items()):
+        if not S in list(matches.values()):
+            print("\nDidn't find a match for: " + S)
+            i = 0
+            for T in title_list:
+                if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
+                i += 1
+            print("Here's the first few lines of the SRT:")
+            print((  re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
+            choice = input("Which one should I match it to? (zero for no match)  ")
+            if int(choice)>0:
+                matches[ title_list[ int(choice)-1 ] ] = S
+                print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
+    print("ok, here are the matches:")
+    print(json.dumps(matches,indent=2))
+    
+    # construct subsidiary pages, upload them
+    i = 0
+    for m,v in list(matches.items()):
+        # open template
+        # do replacement
+        i += 1
+        data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v  }
+        print(json.dumps(data,indent=2))
+        file_part = v.split('.')[0]
+        new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
+        new_php.write(build_srt_embed_php(data))
+        new_php.close()
+    #srt_files = os.listdir(srt_folder)
+    put_file(class_srt_folder)
+    
+    
+def test_swap():
+    crs_id = '6923'
+    # swap in embed code and re-upload canvas pages
+    v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
+    v2_content = v2_pages.read()
+    ma = re.compile('(\w+)=(".*?")')
+    
+    for L in v2_content.split('\n'):
+        find = re.findall('<iframe(.*?)>',L)
+        if find:
+            print("Found: ", find)
+            for each in find:
+                #print "\n"
+                this_title = ''
+                this_src = ''
+                for g in ma.findall(each):
+                    #print g
+                    if g[0]=='title':
+                        this_title = g[1].replace('"','')
+                    if g[0]=='src':
+                        this_src = g[1].replace('"','')
+                    #print g
+                if not this_title:
+                    tmp = re.search(r'embed\/(.*?)\?',this_src)
+                    if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
+                    if tmp:
+                        this_title = yt_title(tmp.groups()[0])
+                print("Found embed link: %s\n and title: %s\n" % (this_src,this_title.encode('ascii','ignore')))