diff --git a/content.py b/content.py
index dceb58b..533b621 100644
--- a/content.py
+++ b/content.py
@@ -158,6 +158,7 @@ def accessible_check(id=""):
     if not id:
         id = input("ID of course to check?  ")
     verbose = 1
+    PAGES_ONLY = 1
     
     save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']
 
@@ -232,36 +233,37 @@ def accessible_check(id=""):
     ###
     ### FILES
     ###
-    files_f = course_folder + '/files'
-    headered = 0
-    print("\nFILES")
-    try:
-        os.mkdir(files_f)
-    except:
-        print(" * Files folder already exists.")
+    if not PAGES_ONLY:    
+        files_f = course_folder + '/files'
+        headered = 0
+        print("\nFILES")
+        try:
+            os.mkdir(files_f)
+        except:
+            print(" * Files folder already exists.")
         
-    files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
-    print("LISTING COURSE FILES")
-    for f in files:
-        for arg in 'filename,content-type,size,url'.split(','):
-            if arg=='size':
-                f['size'] = str(int(f['size']) / 1000) + 'k'
+        files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
+        print("LISTING COURSE FILES")
+        for f in files:
+            for arg in 'filename,content-type,size,url'.split(','):
+                if arg=='size':
+                    f['size'] = str(int(f['size']) / 1000) + 'k'
         
-        if f['content-type'] in save_file_types:
-            d(' - %s' % f['filename'])
+            if f['content-type'] in save_file_types:
+                d(' - %s' % f['filename'])
             
-            if not os.path.exists(files_f + '/' + f['filename']):
-                r = requests.get(f['url'],headers=header, stream=True)
-                with open(files_f + '/' + f['filename'], 'wb') as fd:
-                    for chunk in r.iter_content(chunk_size=128):
-                        fd.write(chunk)
-            else:
-                d(" - already downloaded %s" % files_f + '/' + f['filename'])
+                if not os.path.exists(files_f + '/' + f['filename']):
+                    r = requests.get(f['url'],headers=header, stream=True)
+                    with open(files_f + '/' + f['filename'], 'wb') as fd:
+                        for chunk in r.iter_content(chunk_size=128):
+                            fd.write(chunk)
+                else:
+                    d(" - already downloaded %s" % files_f + '/' + f['filename'])
                 
-            if not headered: 
-                index.append( ('<br /><b>Files</b><br />') )
-                headered = 1
-            index.append( ('files/' + f['filename'], f['filename']) )
+                if not headered: 
+                    index.append( ('<br /><b>Files</b><br />') )
+                    headered = 1
+                index.append( ('files/' + f['filename'], f['filename']) )
         
     ###
     ### PAGES
@@ -295,8 +297,9 @@ def accessible_check(id=""):
         if os.path.exists(this_page_filename):
             d(" - already downloaded %s" % this_page_filename)
             this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
-        elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
-            d('   * skipping file behind passwords')
+        #elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
+        #elif re.search(r'eis-prod',p['url']):
+        #    d('   * skipping file behind passwords')
         else:    
             t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
             if t2 and 'body' in t2 and t2['body']:
@@ -314,22 +317,22 @@ def accessible_check(id=""):
                     src = I.get('src')
                     if src:
                         d('   - %s' % src)
-                        if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
-                            d('   * skipping file behind passwords')
-                        else:
-                            try:
-                                r = requests.get(src,headers=header, stream=True)
-                                mytype = r.headers['content-type']
-                                #print("Response is type: " + str(mytype))
-                                r_parts = mytype.split("/")
-                                ending = r_parts[-1]
+                        #if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
+                        #    d('   * skipping file behind passwords')
+                        #else:
+                        try:
+                            r = requests.get(src,headers=header, stream=True)
+                            mytype = r.headers['content-type']
+                            #print("Response is type: " + str(mytype))
+                            r_parts = mytype.split("/")
+                            ending = r_parts[-1]
 
-                                with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
-                                    for chunk in r.iter_content(chunk_size=128):
-                                        fd.write(chunk)
-                                image_count += 1
-                            except Exception as e:
-                                d( ' * Error downloading page image, %s' % str(e) )
+                            with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+                                for chunk in r.iter_content(chunk_size=128):
+                                    fd.write(chunk)
+                            image_count += 1
+                        except Exception as e:
+                            d( ' * Error downloading page image, %s' % str(e) )
                     
                 try:
                     with codecs.open(this_page_filename, 'w','utf-8') as fd:
@@ -350,78 +353,80 @@ def accessible_check(id=""):
     ###
     ### ASSIGNMENTS
     ###
-    headered = 0
-    asm_f = course_folder + '/assignments'
-    print("\nASSIGNMENTS")
-    try:
-        os.mkdir(asm_f)
-    except:
-        d(" - Assignments dir exists")
-        
-    asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
-    for p in asm:
-        d(' - %s' % p['name'])
-
-        
+    
+    if not PAGES_ONLY:
+        headered = 0
+        asm_f = course_folder + '/assignments'
+        print("\nASSIGNMENTS")
         try:
-            friendlyfile = to_file_friendly(p['name'])
-            this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
-            if os.path.exists(this_assmt_filename):
-                d(" - already downloaded %s" % this_assmt_filename)
-                this_assmt_content = open(this_assmt_filename,'r').read()
-            else:
-                t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
-                with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
-                    this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
-                    fd.write(this_assmt_content)
-                if not headered: 
-                    index.append( ('<br /><b>Assignments</b><br />') )
-                    headered = 1
-                index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
+            os.mkdir(asm_f)
+        except:
+            d(" - Assignments dir exists")
+        
+        asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
+        for p in asm:
+            d(' - %s' % p['name'])
+
+        
+            try:
+                friendlyfile = to_file_friendly(p['name'])
+                this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+                if os.path.exists(this_assmt_filename):
+                    d(" - already downloaded %s" % this_assmt_filename)
+                    this_assmt_content = open(this_assmt_filename,'r').read()
+                else:
+                    t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
+                    with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
+                        this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
+                        fd.write(this_assmt_content)
+                    if not headered: 
+                        index.append( ('<br /><b>Assignments</b><br />') )
+                        headered = 1
+                    index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
                 
-            # write to running log of content in order of module
-            if p['id'] in item_id_to_index:
-                items[  item_id_to_index[ p['url'] ]  ] = this_assmt_content+'\n\n'+pagebreak
-        except Exception as e:
-            d(' * Problem %s' % str(e))
+                # write to running log of content in order of module
+                if p['id'] in item_id_to_index:
+                    items[  item_id_to_index[ p['url'] ]  ] = this_assmt_content+'\n\n'+pagebreak
+            except Exception as e:
+                d(' * Problem %s' % str(e))
     
-    ###
-    ### FORUMS
-    ###
+        ###
+        ### FORUMS
+        ###
 
-    index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
+        index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
 
-    """        
+        """        
     
     
     
-    ###
-    ### QUIZZES
-    ###
+        ###
+        ### QUIZZES
+        ###
     
     
-    # get a list external urls
-    headered = 0
-    t = url + '/api/v1/courses/' + str(id) + '/modules'
-    while t: t = fetch(t)
-    mods = results
-    results = []
-    for m in mods:
+        # get a list external urls
+        headered = 0
+        t = url + '/api/v1/courses/' + str(id) + '/modules'
+        while t: t = fetch(t)
+        mods = results
         results = []
-        t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
-        while t2: t2 = fetch(t2)
-        items = results
-        for i in items:
-            #print i
-            if i['type'] == "ExternalUrl":
+        for m in mods:
+            results = []
+            t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
+            while t2: t2 = fetch(t2)
+            items = results
+            for i in items:
                 #print i
-                for j in 'id,title,external_url'.split(','):
-                    print unicode(i[j]), "\t",
-                print ""
-                if not headered: index.append( ('<br /><b>External Links</b><br />') )
-                headered = 1
-                index.append( (i['external_url'], i['title']) )
-    """
+                if i['type'] == "ExternalUrl":
+                    #print i
+                    for j in 'id,title,external_url'.split(','):
+                        print unicode(i[j]), "\t",
+                    print ""
+                    if not headered: index.append( ('<br /><b>External Links</b><br />') )
+                    headered = 1
+                    index.append( (i['external_url'], i['title']) )
+        """
     
     
     
diff --git a/courses.py b/courses.py
index 6bce300..4bd18dd 100644
--- a/courses.py
+++ b/courses.py
@@ -1,9 +1,11 @@
+from ast import Try
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from datetime import datetime   
 import pytz
 from dateutil import parser
 from datetime import datetime
+#from symbol import try_stmt
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
 from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
 from pipelines import sems
@@ -971,12 +973,16 @@ def enroll_id_list_to_shell(id_list, shell_id, v=0):
             print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))
 
 
-
-
+# multiple semesters
 def enroll_stem_students_live():
+    semesters = [183,184]
+    
+    for S in semesters:
+        enroll_stem_students_live_semester(S)
+
+
+def enroll_stem_students_live_semester(the_term, do_removes=0):
     import localcache2
-    the_term = '181'  # su23   fa23 = 180
-    do_removes = 0
     depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR".split(" ")
     users_to_enroll = users_in_by_depts_live(depts, the_term)      # term id
     
@@ -1317,23 +1323,15 @@ def course_search_by_sis():
 
 
 def course_by_depts_terms(section=0):
-    """s = [ x.strip() for x in codecs.open('cache/fa22_eval_sections.csv','r').readlines()]
-    s = list(funcy.flatten(s))
-    s.sort()
-    xyz = input('hit return to continue')
-    """
     
-    #c = getCoursesInTerm(168,0,1)
-    #c = getCoursesInTerm(174,0,1)   # sp22
-    #c = getCoursesInTerm(176,0,1)   # fa22
-    
-    get_fresh = 1
-    SP_TERM = 181
-    WI_TERM = 182
-    SEM = "sp24"
+    get_fresh = 0
+    #SP_TERM = 181
+    #WI_TERM = 182
+    TERM = 183
+    SEM = "su24"
 
     make_changes = 1
-    make_changes_LS = 1
+    do_all = 0
 
     winter_start_day = 2
     aviation_start_day = 11
@@ -1341,16 +1339,20 @@ def course_by_depts_terms(section=0):
     spring_start_day = 29
     
     if get_fresh:
-        c = getCoursesInTerm(SP_TERM,0,0)    
-        codecs.open(f'cache/courses_in_term_{SP_TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,0,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
     else:
-        c = json.loads(  codecs.open(f'cache/courses_in_term_{SP_TERM}.json','r','utf-8').read()  )
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
     
     crn_to_canvasid = {}
     for C in c:
-        #print(C['name'])
         if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
             crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
     
     #print(crn_to_canvasid)
     #return
@@ -1361,39 +1363,60 @@ def course_by_depts_terms(section=0):
         start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
         d_start = datetime.strptime(start,"%m/%d/%Y")
         
-        if d_start.month > 5:
-            print("Ignoring ", d_start, " starting too late...")
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
             continue
         
-        if d_start.month == 1 and d_start.day == aviation_start_day:
-            print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
-            continue
+        print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
+        if 1:
+            if d_start.month < 5 or d_start.month > 7:
+                print(f"   Ignoring {d_start}, starting too far away...")
+                continue
+        
+            #if d_start.month == 1 and d_start.day == aviation_start_day:
+            #    print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
+            #    continue
             
-        if d_start.month == 1 and d_start.day == nursing_start_day:
-            print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
-            continue
+            #if d_start.month == 1 and d_start.day == nursing_start_day:
+            #    print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
+            #    continue
+
+            if d_start.month == 5 and d_start.day == 28:
+                print("   Ignoring, term start date" )
+                continue
             
-        if d_start.month == 1 and d_start.day == winter_start_day:
-            print("+ winter session: ", d_start, " - ", S['code'])
-            data = {'course[term_id]':WI_TERM}
-            u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
-            if make_changes:
-                r3 = requests.put(u2, headers=header, params=data)
-                print("   updated..  OK")
-                #print(r3.text)
-            continue
+            else:
+                print("   Adjust course start day?")
+                
+                if make_changes:
+                    if do_all != 'a':
+                        do_all = input('   -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit.  >')
+                        if do_all == 'n':
+                            exit()
+                    if do_all == '' or do_all == 'a':    
+                        data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
+                                'course[restrict_enrollments_to_course_dates]':True }
+                        u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
+                        r3 = requests.put(u2, headers=header, params=data)
+                        print("   updated..  OK")
+                    
+
+            """if d_start.month == 1 and d_start.day == winter_start_day:
+                print("+ winter session: ", d_start, " - ", S['code'])
+                data = {'course[term_id]':WI_TERM}
+                u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
+                if make_changes:
+                    r3 = requests.put(u2, headers=header, params=data)
+                    print("   updated..  OK")
+                    #print(r3.text)
+                continue"""
             
-        if d_start.month == 1 and d_start.day == spring_start_day:
-            # normal class
-            continue
+            #if d_start.month == 1 and d_start.day == spring_start_day:
+            #    # normal class
+            #    continue
             
-        print("- Late start? ", start, d_start, " - ", S['code'], " ", S['crn'] )
-        if make_changes_LS:
-            data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
-                    'course[restrict_enrollments_to_course_dates]':True }
-            u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
-            r3 = requests.put(u2, headers=header, params=data)
-            print("   updated..  OK")
     
     return
     
diff --git a/localcache2.py b/localcache2.py
index b726df7..202196a 100644
--- a/localcache2.py
+++ b/localcache2.py
@@ -341,6 +341,8 @@ def create_schedule_table_if_not_exists():
 # Populate schedule table and correlate to courses table
 def courses_to_sched():
 
+    # TODO: fix units when they are variable... change to float in between range. round to 0.5 unit.
+
     EXECUTE = 1
 
     seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
diff --git a/outcomes2022.py b/outcomes2022.py
index 30c8c8c..2c9af27 100644
--- a/outcomes2022.py
+++ b/outcomes2022.py
@@ -30,7 +30,7 @@ from path_dict import PathDict
 outputfile = ''
 csvwriter = ''
 
-TERM = 181
+TERM = 183
 
 
 def escape_commas(s):
@@ -149,40 +149,43 @@ def ilearn_shell_slo_to_csv(shell_slos):
         L.append("o%i_assd" % i)
     df = pd.DataFrame(columns=L)
     for S in shell_slos:
-        short = S[0]
-        this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
-        if len(S)>1:
-            full = S[1]
-            this_crs['has_outcomes'] = 1
+        try:
+            short = S[0]
+            this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
+            if len(S)>1:
+                full = S[1]
+                this_crs['has_outcomes'] = 1
             
-            i = 1
+                i = 1
             
-            for o in full['outcomes']:
-                try:
-                    this_id = int(o['outcome']['id'])
-                    this_crs['o%i_id' % i] = o['outcome']['id']
-                except Exception as e:
-                    this_crs['o%i_id' % i] = '!'
-                try:
-                    this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
-                except Exception as e:
-                    this_crs['o%i_desc' % i] = '!'
-                try:
-                    assessed = 0
-                    if full['full_outcomes'][this_id]['assessed'] == 'True':
-                        assessed = 1
-                    this_crs['o%i_assd' % i] = assessed
-                except Exception as e:
-                    this_crs['o%i_assd' % i] = '!'
-                try:
-                    this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
-                except Exception as e:
-                    this_crs['o%i_vendor_guid' % i] = '!'
+                for o in full['outcomes']:
+                    try:
+                        this_id = int(o['outcome']['id'])
+                        this_crs['o%i_id' % i] = o['outcome']['id']
+                    except Exception as e:
+                        this_crs['o%i_id' % i] = '!'
+                    try:
+                        this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
+                    except Exception as e:
+                        this_crs['o%i_desc' % i] = '!'
+                    try:
+                        assessed = 0
+                        if full['full_outcomes'][this_id]['assessed'] == 'True':
+                            assessed = 1
+                        this_crs['o%i_assd' % i] = assessed
+                    except Exception as e:
+                        this_crs['o%i_assd' % i] = '!'
+                    try:
+                        this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
+                    except Exception as e:
+                        this_crs['o%i_vendor_guid' % i] = '!'
                 
-                i += 1
-                
-        df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
-        df = pd.concat( [df, df2], ignore_index = True )
+                    i += 1
+            df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
+            df = pd.concat( [df, df2], ignore_index = True )
+
+        except Exception as e:
+            print(f"*** Exception {e} with {S}\n\n")
         
     df.to_csv('cache/outcome.csv')
     print(df)
diff --git a/stats.py b/stats.py
index 3486fea..24bb592 100644
--- a/stats.py
+++ b/stats.py
@@ -1393,6 +1393,11 @@ def report_student_stats():
     # Save the figure in an HTML file
     pio.write_html(fig, 'cache/student_pct_onlinecourse.html')
 
+
+def test_rpy():
+    pass
+
+'''
 def test_rpy():
     from rpy2 import robjects
     from rpy2.robjects import Formula, Environment
@@ -1439,8 +1444,162 @@ def test_rpy2():
     utils = importr('utils')
     pi = robjects.r['pi']
     print(f"pi={pi[0]}")
+'''
 
 
+
+
+import pandas as pd
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import silhouette_score
+from sklearn.tree import DecisionTreeClassifier, export_graphviz
+import graphviz
+
+from joblib import dump, load
+
+
+def cluster_by_mode_1():
+
+    # Load the data from a CSV file
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Extract the relevant features
+    features = data[['num_semesters', 'num_units', 'inperson_units', 'hybrid_units', 'online_units']]
+
+    # Standardize the features
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(features)
+
+    # Perform clustering with different numbers of clusters
+    for n_clusters in range(4, 12):
+        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+        kmeans.fit(scaled_features)
+    
+        # Add the cluster labels to the original data
+        data[f'cluster_{n_clusters}'] = kmeans.labels_
+    
+        print(f"Clustering with {n_clusters} clusters:")
+        print(data.groupby(f'cluster_{n_clusters}').size())
+        print()
+
+    # Save the updated data with cluster labels to a new CSV file
+    data.to_csv('cache/students_bymode_with_clusters_1.csv', index=False)
+    
+
+
+
+def cluster_by_mode():
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Split features and target
+    X = data.drop('g_number', axis=1)
+    y = data['g_number']
+
+    # Train decision tree classifier
+    clf = DecisionTreeClassifier()
+    clf.fit(X, y)
+
+    # Visualize decision tree
+    dot_data = export_graphviz(clf, out_file=None, 
+                               feature_names=X.columns,
+                               class_names=y.unique(),
+                               filled=True, rounded=True,
+                               special_characters=True)
+    graph = graphviz.Source(dot_data)
+    graph.render('decision_tree', view=True)
+    data.to_csv('cache/students_bymode_with_dt.csv', index=False)
+
+
+def cluster_by_mode_2():
+
+    # Load the data from a CSV file
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Extract the features (excluding the 'g_number' column)
+    features = data.drop('g_number', axis=1)
+
+    # Scale the features to have zero mean and unit variance
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(features)
+
+    # Determine the ideal number of clusters using the elbow method
+    inertias = []
+    for k in range(4, 40):  # Try different values of k (e.g., 1 to 10)
+        kmeans = KMeans(n_clusters=k, random_state=42)
+        kmeans.fit(scaled_features)
+        inertias.append(kmeans.inertia_)
+
+    # Plot the elbow curve
+    import matplotlib.pyplot as plt
+    plt.plot(range(4, 40), inertias, marker='o')
+    plt.xlabel('Number of Clusters (k)')
+    plt.ylabel('Inertia')
+    plt.title('Elbow Method')
+    plt.show()
+
+    # Choose the ideal number of clusters based on the elbow curve
+    ideal_k = 12  # Adjust this based on your observation
+
+    # Perform clustering with the ideal number of clusters
+    kmeans = KMeans(n_clusters=ideal_k, random_state=42)
+    kmeans.fit(scaled_features)
+
+
+
+    # Get the cluster labels for each data point
+    labels = kmeans.labels_
+
+    # Add the cluster labels to the original data
+    data['Cluster'] = labels
+
+    # Save the cluster labels to a new CSV file
+    data.to_csv('cache/students_bymode_with_clusters_2.csv', index=False)
+
+    # Get the cluster centers (centroids)
+    centroids = scaler.inverse_transform(kmeans.cluster_centers_)
+
+    # Print the cluster centers
+    for i, centroid in enumerate(centroids):
+        print(f"Cluster {i} center:")
+        for feature, value in zip(features.columns, centroid):
+            print(f"{feature}: {value}")
+        print()
+        
+
+    # Save the trained objects to files
+    dump(kmeans, 'kmeans.joblib')
+    dump(scaler, 'scaler.joblib')
+
+    # Load the saved objects for future use
+    loaded_kmeans = load('kmeans.joblib')
+    loaded_scaler = load('scaler.joblib')
+
+    # Use the loaded objects for predictions
+    new_data_scaled = loaded_scaler.transform(new_data)
+    predictions = loaded_kmeans.predict(new_data_scaled)
+    
+
+def cluster_with_new_data():
+    ## NOT TESTED
+    # need to save the kmeans and scaler objects from previous step.
+    
+    # Load the new data
+    new_data = pd.read_csv('new_data.csv')
+
+    # Extract the features from the new data
+    new_features = new_data.drop('g_number', axis=1)
+
+    # Scale the new features using the fitted scaler
+    scaled_new_features = scaler.transform(new_features)
+
+    # Predict the cluster labels for the new data
+    new_labels = kmeans.predict(scaled_new_features)
+
+    # Add the cluster labels to the new data
+    new_data['Cluster'] = new_labels
+
 if __name__ == "__main__":
     options = { 1: ['get all historical grades from ilearn',get_all] ,  
                 2: ['process grades csv file',process_grades] , 
@@ -1462,6 +1621,7 @@ if __name__ == "__main__":
                 30: ['visualize course modes multi semester', visualize_course_modes_multi_semester],
                 31: ['Report on student stats', report_student_stats],
                 32: ['test rpy', test_rpy],
+                33: ['cluster students by mode', cluster_by_mode],
               }
     print ('')
 
diff --git a/useful queries.sql b/useful queries.sql
index 70e72b9..e9682b6 100644
--- a/useful queries.sql	
+++ b/useful queries.sql	
@@ -66,6 +66,14 @@ where (s.type='online' or s.type='hybrid' or s.type='online line')
 	and not cc.path='sstaff@gavilan.edu'
 order by u.sortable_name;
 
+-- names that are uppercase
+SELECT * 
+FROM canvas.users
+WHERE REGEXP_LIKE(name, '^[A-Z]+[[:space:]]')
+order by last_logged_out ;
+
+
+
 
 -- for outlook
 select string_agg(distinct LOWER(cc.path), '; ') from canvas.courses c
@@ -112,7 +120,12 @@ group by u.sortable_name
 order by total desc, online desc, onlinelive desc, hybrid desc;
 
 
+-- find (fix?) rows where units are variable
+SELECT * FROM canvas.schedule
+WHERE units LIKE '%-%';
 
+SELECT * FROM canvas.schedule
+WHERE units LIKE '%/%';
 
 -- num units
 select u.sortable_name, p.sis_user_id,
@@ -145,6 +158,76 @@ order by total desc;
 
 
 
+-- students.csv: each student, num_semesters, num_units, num_f2f, num_online, num_hybrid
+
+SELECT 
+    p.sis_user_id as g_number,
+    COUNT(DISTINCT s.sem) AS num_semesters,
+    SUM(s.units::FLOAT) AS num_units,
+    sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson_units,
+    sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid_units,
+    sum(CASE WHEN s.type = 'online' or s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS online_units
+FROM 
+    canvas.users u
+    JOIN canvas.enrollments e ON u.id = e.user_id
+    JOIN canvas.courses c ON e.course_id = c.id
+    JOIN canvas.schedule s ON c.id = s.canvascourse
+    JOIN canvas.pseudonyms p ON u.id = p.user_id
+WHERE 
+    e.workflow_state = 'active' 
+    AND e.type = 'StudentEnrollment'
+    AND u.id IN (
+		SELECT u.id FROM canvas.enrollments AS e
+		JOIN canvas.users AS u ON e.user_id=u.id
+		JOIN canvas.courses AS c ON e.course_id=c.id
+		WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
+		AND e.workflow_state='active'
+		AND e.type='StudentEnrollment'
+		GROUP BY u.id
+	)
+GROUP BY 
+    p.sis_user_id
+ORDER BY 
+    num_semesters, p.sis_user_id;
+
+   
+   
+
+   
+   
+   
+
+-- students.csv: each student, num_semesters, num_units
+
+SELECT 
+    p.sis_user_id as g_number,
+    COUNT(DISTINCT s.sem) AS num_semesters,
+    SUM(s.units::FLOAT) AS num_units
+FROM 
+    canvas.users u
+    JOIN canvas.enrollments e ON u.id = e.user_id
+    JOIN canvas.courses c ON e.course_id = c.id
+    JOIN canvas.schedule s ON c.id = s.canvascourse
+    JOIN canvas.pseudonyms p ON u.id = p.user_id
+WHERE 
+    e.workflow_state = 'active' 
+    AND e.type = 'StudentEnrollment'
+    AND u.id IN (
+		SELECT u.id FROM canvas.enrollments AS e
+		JOIN canvas.users AS u ON e.user_id=u.id
+		JOIN canvas.courses AS c ON e.course_id=c.id
+		WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
+		AND e.workflow_state='active'
+		AND e.type='StudentEnrollment'
+		GROUP BY u.id
+	)
+GROUP BY 
+    p.sis_user_id
+ORDER BY 
+    num_semesters, p.sis_user_id;
+
+
+
 -- each class
 
 select u.sortable_name, c.course_code, s.type, s.units::FLOAT
diff --git a/users.py b/users.py
index c076c4b..c192135 100644
--- a/users.py
+++ b/users.py
@@ -1796,8 +1796,8 @@ def track_user(id=0,qid=0):
         url_addition = ""
         
         if 1:    # hard code dates
-            start_date = "2023-08-01T00:00:00-07:00"
-            end_date   = "2024-01-01T00:00:00-07:00"
+            start_date = "2024-01-01T00:00:00-07:00"
+            end_date   = "2024-07-01T00:00:00-07:00"
             url_addition = f"?start_time={start_date}&end_time={end_date}"
         elif 'last_days_log' in info:
             print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))
@@ -2041,80 +2041,6 @@ def find_new_teachers():
     for J in jj:
         print( J['teacher'])
     
-def user_db_sync():
-    # currently in db
-    conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1")
-    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
-
-    #fetch all staff from ilearn                        ILRN            unique emails                                             
-    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
-    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
-
-    for e in ilrn_emails:
-        
-        if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
-            E = funcy.first(funcy.where(ilrn,email=e))
-            goo = E['login_id'][3:]
-            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
-            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
-            
-
-
-def user_db_sync2():
-    #fetch all personnel dir entries from dir_api.php.  PERSL           unique emails                                             
-    persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
-    persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
-    #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
-    #
-    #fetch all staff from ilearn                        ILRN            unique emails                                             
-    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
-    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
-    #
-    #fetch all conf_users from dir_api.php              CONUSR          unique emails                                             
-    conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
-    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
-    
-    #fetch all gavi_personnel_ext  from dir_api.php     GPEREXT         must have column 'personnel' or 'c_users' or both.        
-    gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
-    
-    all_emails = set(persl_emails)
-    all_emails.update(ilrn_emails)
-    all_emails.update(conusr_emails)
-    
-    all_emails = list(all_emails)
-    all_emails.sort()
-    
-    fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
-    fout.write('email,personnel_dir,ilearn,conf_user\n')
-    for e in all_emails:
-        
-        if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
-            E = funcy.first(funcy.where(ilrn,email=e))
-            goo = E['login_id'][3:]
-            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
-            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
-            
-            # goo (minus G00) email, and name go into conf_users
-        
-        fout.write(e+',')
-        if e in persl_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        if e in ilrn_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        if e in conusr_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        fout.write('\n')
-    fout.close()
-    #
-
-    #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
-    print('done')
 
 import traceback