flex day helper

summer 24 changes
2024-07-22 15:20:20 -07:00 · 2024-07-22 15:19:31 -07:00
8 changed files with 674 additions and 260 deletions
--- a/content.py
+++ b/content.py
@ -158,6 +158,7 @@ def accessible_check(id=""):
    if not id:
        id = input("ID of course to check?  ")
    verbose = 1
+    PAGES_ONLY = 1
    
    save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']

@ -232,36 +233,37 @@ def accessible_check(id=""):
    ###
    ### FILES
    ###
-    files_f = course_folder + '/files'
-    headered = 0
-    print("\nFILES")
-    try:
-        os.mkdir(files_f)
-    except:
-        print(" * Files folder already exists.")
+    if not PAGES_ONLY:    
+        files_f = course_folder + '/files'
+        headered = 0
+        print("\nFILES")
+        try:
+            os.mkdir(files_f)
+        except:
+            print(" * Files folder already exists.")
        
-    files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
-    print("LISTING COURSE FILES")
-    for f in files:
-        for arg in 'filename,content-type,size,url'.split(','):
-            if arg=='size':
-                f['size'] = str(int(f['size']) / 1000) + 'k'
+        files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
+        print("LISTING COURSE FILES")
+        for f in files:
+            for arg in 'filename,content-type,size,url'.split(','):
+                if arg=='size':
+                    f['size'] = str(int(f['size']) / 1000) + 'k'
        
-        if f['content-type'] in save_file_types:
-            d(' - %s' % f['filename'])
+            if f['content-type'] in save_file_types:
+                d(' - %s' % f['filename'])
            
-            if not os.path.exists(files_f + '/' + f['filename']):
-                r = requests.get(f['url'],headers=header, stream=True)
-                with open(files_f + '/' + f['filename'], 'wb') as fd:
-                    for chunk in r.iter_content(chunk_size=128):
-                        fd.write(chunk)
-            else:
-                d(" - already downloaded %s" % files_f + '/' + f['filename'])
+                if not os.path.exists(files_f + '/' + f['filename']):
+                    r = requests.get(f['url'],headers=header, stream=True)
+                    with open(files_f + '/' + f['filename'], 'wb') as fd:
+                        for chunk in r.iter_content(chunk_size=128):
+                            fd.write(chunk)
+                else:
+                    d(" - already downloaded %s" % files_f + '/' + f['filename'])
                
-            if not headered: 
-                index.append( ('<br /><b>Files</b><br />') )
-                headered = 1
-            index.append( ('files/' + f['filename'], f['filename']) )
+                if not headered: 
+                    index.append( ('<br /><b>Files</b><br />') )
+                    headered = 1
+                index.append( ('files/' + f['filename'], f['filename']) )
        
    ###
    ### PAGES
@ -295,8 +297,9 @@ def accessible_check(id=""):
        if os.path.exists(this_page_filename):
            d(" - already downloaded %s" % this_page_filename)
            this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
-        elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
-            d('   * skipping file behind passwords')
+        #elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
+        #elif re.search(r'eis-prod',p['url']):
+        #    d('   * skipping file behind passwords')
        else:    
            t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
            if t2 and 'body' in t2 and t2['body']:
@ -314,22 +317,22 @@ def accessible_check(id=""):
                    src = I.get('src')
                    if src:
                        d('   - %s' % src)
-                        if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
-                            d('   * skipping file behind passwords')
-                        else:
-                            try:
-                                r = requests.get(src,headers=header, stream=True)
-                                mytype = r.headers['content-type']
-                                #print("Response is type: " + str(mytype))
-                                r_parts = mytype.split("/")
-                                ending = r_parts[-1]
+                        #if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
+                        #    d('   * skipping file behind passwords')
+                        #else:
+                        try:
+                            r = requests.get(src,headers=header, stream=True)
+                            mytype = r.headers['content-type']
+                            #print("Response is type: " + str(mytype))
+                            r_parts = mytype.split("/")
+                            ending = r_parts[-1]

-                                with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
-                                    for chunk in r.iter_content(chunk_size=128):
-                                        fd.write(chunk)
-                                image_count += 1
-                            except Exception as e:
-                                d( ' * Error downloading page image, %s' % str(e) )
+                            with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
+                                for chunk in r.iter_content(chunk_size=128):
+                                    fd.write(chunk)
+                            image_count += 1
+                        except Exception as e:
+                            d( ' * Error downloading page image, %s' % str(e) )
                    
                try:
                    with codecs.open(this_page_filename, 'w','utf-8') as fd:
@ -350,78 +353,80 @@ def accessible_check(id=""):
    ###
    ### ASSIGNMENTS
    ###
-    headered = 0
-    asm_f = course_folder + '/assignments'
-    print("\nASSIGNMENTS")
-    try:
-        os.mkdir(asm_f)
-    except:
-        d(" - Assignments dir exists")
-        
-    asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
-    for p in asm:
-        d(' - %s' % p['name'])
-
    
+    if not PAGES_ONLY:
+        headered = 0
+        asm_f = course_folder + '/assignments'
+        print("\nASSIGNMENTS")
        try:
-            friendlyfile = to_file_friendly(p['name'])
-            this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
-            if os.path.exists(this_assmt_filename):
-                d(" - already downloaded %s" % this_assmt_filename)
-                this_assmt_content = open(this_assmt_filename,'r').read()
-            else:
-                t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
-                with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
-                    this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
-                    fd.write(this_assmt_content)
-                if not headered: 
-                    index.append( ('<br /><b>Assignments</b><br />') )
-                    headered = 1
-                index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
+            os.mkdir(asm_f)
+        except:
+            d(" - Assignments dir exists")
        
-            # write to running log of content in order of module
-            if p['id'] in item_id_to_index:
-                items[  item_id_to_index[ p['url'] ]  ] = this_assmt_content+'\n\n'+pagebreak
-        except Exception as e:
-            d(' * Problem %s' % str(e))
+        asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
+        for p in asm:
+            d(' - %s' % p['name'])

-    ###
-    ### FORUMS
-    ###
        
-    index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
+            try:
+                friendlyfile = to_file_friendly(p['name'])
+                this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
+                if os.path.exists(this_assmt_filename):
+                    d(" - already downloaded %s" % this_assmt_filename)
+                    this_assmt_content = open(this_assmt_filename,'r').read()
+                else:
+                    t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
+                    with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
+                        this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
+                        fd.write(this_assmt_content)
+                    if not headered: 
+                        index.append( ('<br /><b>Assignments</b><br />') )
+                        headered = 1
+                    index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
                
-    """        
+                # write to running log of content in order of module
+                if p['id'] in item_id_to_index:
+                    items[  item_id_to_index[ p['url'] ]  ] = this_assmt_content+'\n\n'+pagebreak
+            except Exception as e:
+                d(' * Problem %s' % str(e))
+    
+        ###
+        ### FORUMS
+        ###
+
+        index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
+
+        """        
    
    
    
-    ###
-    ### QUIZZES
-    ###
+        ###
+        ### QUIZZES
+        ###
    
    
-    # get a list external urls
-    headered = 0
-    t = url + '/api/v1/courses/' + str(id) + '/modules'
-    while t: t = fetch(t)
-    mods = results
-    results = []
-    for m in mods:
+        # get a list external urls
+        headered = 0
+        t = url + '/api/v1/courses/' + str(id) + '/modules'
+        while t: t = fetch(t)
+        mods = results
        results = []
-        t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
-        while t2: t2 = fetch(t2)
-        items = results
-        for i in items:
-            #print i
-            if i['type'] == "ExternalUrl":
+        for m in mods:
+            results = []
+            t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
+            while t2: t2 = fetch(t2)
+            items = results
+            for i in items:
                #print i
-                for j in 'id,title,external_url'.split(','):
-                    print unicode(i[j]), "\t",
-                print ""
-                if not headered: index.append( ('<br /><b>External Links</b><br />') )
-                headered = 1
-                index.append( (i['external_url'], i['title']) )
-    """
+                if i['type'] == "ExternalUrl":
+                    #print i
+                    for j in 'id,title,external_url'.split(','):
+                        print unicode(i[j]), "\t",
+                    print ""
+                    if not headered: index.append( ('<br /><b>External Links</b><br />') )
+                    headered = 1
+                    index.append( (i['external_url'], i['title']) )
+        """
    
    
    
--- a/courses.py
+++ b/courses.py
@ -1,9 +1,11 @@
+from ast import Try
 import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from datetime import datetime   
 import pytz
 from dateutil import parser
 from datetime import datetime
+#from symbol import try_stmt
 from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
 from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
 from pipelines import sems
@ -971,12 +973,16 @@ def enroll_id_list_to_shell(id_list, shell_id, v=0):
            print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))


-
-
+# multiple semesters
 def enroll_stem_students_live():
+    semesters = [183,184]
+    
+    for S in semesters:
+        enroll_stem_students_live_semester(S)
+
+
+def enroll_stem_students_live_semester(the_term, do_removes=0):
    import localcache2
-    the_term = '181'  # su23   fa23 = 180
-    do_removes = 0
    depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR".split(" ")
    users_to_enroll = users_in_by_depts_live(depts, the_term)      # term id
    
@ -1317,23 +1323,15 @@ def course_search_by_sis():


 def course_by_depts_terms(section=0):
-    """s = [ x.strip() for x in codecs.open('cache/fa22_eval_sections.csv','r').readlines()]
-    s = list(funcy.flatten(s))
-    s.sort()
-    xyz = input('hit return to continue')
-    """
    
-    #c = getCoursesInTerm(168,0,1)
-    #c = getCoursesInTerm(174,0,1)   # sp22
-    #c = getCoursesInTerm(176,0,1)   # fa22
-    
-    get_fresh = 1
-    SP_TERM = 181
-    WI_TERM = 182
-    SEM = "sp24"
+    get_fresh = 0
+    #SP_TERM = 181
+    #WI_TERM = 182
+    TERM = 183
+    SEM = "su24"

    make_changes = 1
-    make_changes_LS = 1
+    do_all = 0

    winter_start_day = 2
    aviation_start_day = 11
@ -1341,16 +1339,20 @@ def course_by_depts_terms(section=0):
    spring_start_day = 29
    
    if get_fresh:
-        c = getCoursesInTerm(SP_TERM,0,0)    
-        codecs.open(f'cache/courses_in_term_{SP_TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
+        print(f"Getting list of courses in {SEM}")
+        c = getCoursesInTerm(TERM,0,0)    
+        codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
    else:
-        c = json.loads(  codecs.open(f'cache/courses_in_term_{SP_TERM}.json','r','utf-8').read()  )
+        c = json.loads(  codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read()  )
    
    crn_to_canvasid = {}
    for C in c:
-        #print(C['name'])
        if 'sis_course_id' in C and C['sis_course_id']:
+            print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
            crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
+        else:
+            print( f"---NO CRN IN: {C['name']} -> {C}" )
+
    
    #print(crn_to_canvasid)
    #return
@ -1361,39 +1363,60 @@ def course_by_depts_terms(section=0):
        start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
        d_start = datetime.strptime(start,"%m/%d/%Y")
        
-        if d_start.month > 5:
-            print("Ignoring ", d_start, " starting too late...")
+        try:
+            this_id = crn_to_canvasid[S['crn']]
+        except Exception as e:
+            print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
            continue
        
-        if d_start.month == 1 and d_start.day == aviation_start_day:
-            print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
-            continue
+        print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
+        if 1:
+            if d_start.month < 5 or d_start.month > 7:
+                print(f"   Ignoring {d_start}, starting too far away...")
+                continue
        
-        if d_start.month == 1 and d_start.day == nursing_start_day:
-            print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
-            continue
+            #if d_start.month == 1 and d_start.day == aviation_start_day:
+            #    print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
+            #    continue
            
-        if d_start.month == 1 and d_start.day == winter_start_day:
-            print("+ winter session: ", d_start, " - ", S['code'])
-            data = {'course[term_id]':WI_TERM}
-            u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
-            if make_changes:
-                r3 = requests.put(u2, headers=header, params=data)
-                print("   updated..  OK")
-                #print(r3.text)
-            continue
+            #if d_start.month == 1 and d_start.day == nursing_start_day:
+            #    print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
+            #    continue

-        if d_start.month == 1 and d_start.day == spring_start_day:
-            # normal class
-            continue
+            if d_start.month == 5 and d_start.day == 28:
+                print("   Ignoring, term start date" )
+                continue
+            
+            else:
+                print("   Adjust course start day?")
+                
+                if make_changes:
+                    if do_all != 'a':
+                        do_all = input('   -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit.  >')
+                        if do_all == 'n':
+                            exit()
+                    if do_all == '' or do_all == 'a':    
+                        data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
+                                'course[restrict_enrollments_to_course_dates]':True }
+                        u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
+                        r3 = requests.put(u2, headers=header, params=data)
+                        print("   updated..  OK")
+                    
+
+            """if d_start.month == 1 and d_start.day == winter_start_day:
+                print("+ winter session: ", d_start, " - ", S['code'])
+                data = {'course[term_id]':WI_TERM}
+                u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
+                if make_changes:
+                    r3 = requests.put(u2, headers=header, params=data)
+                    print("   updated..  OK")
+                    #print(r3.text)
+                continue"""
+            
+            #if d_start.month == 1 and d_start.day == spring_start_day:
+            #    # normal class
+            #    continue
            
-        print("- Late start? ", start, d_start, " - ", S['code'], " ", S['crn'] )
-        if make_changes_LS:
-            data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
-                    'course[restrict_enrollments_to_course_dates]':True }
-            u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
-            r3 = requests.put(u2, headers=header, params=data)
-            print("   updated..  OK")
    
    return
    
--- a/flexday.py
+++ b/flexday.py
@ -0,0 +1,212 @@
+import funcy, codecs, json, sys, csv, re
+
+
+def user_db_sync():
+    # currently in db
+    conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1")
+    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
+
+    #fetch all staff from ilearn                        ILRN            unique emails                                             
+    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
+    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
+
+    for e in ilrn_emails:
+        
+        if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
+            E = funcy.first(funcy.where(ilrn,email=e))
+            goo = E['login_id'][3:]
+            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
+            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
+            
+
+
+def user_db_sync2():
+    #fetch all personnel dir entries from dir_api.php.  PERSL           unique emails                                             
+    persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
+    persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
+    #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
+    #
+    #fetch all staff from ilearn                        ILRN            unique emails                                             
+    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
+    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
+    #
+    #fetch all conf_users from dir_api.php              CONUSR          unique emails                                             
+    conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
+    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
+    
+    #fetch all gavi_personnel_ext  from dir_api.php     GPEREXT         must have column 'personnel' or 'c_users' or both.        
+    gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
+    
+    all_emails = set(persl_emails)
+    all_emails.update(ilrn_emails)
+    all_emails.update(conusr_emails)
+    
+    all_emails = list(all_emails)
+    all_emails.sort()
+    
+    fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
+    fout.write('email,personnel_dir,ilearn,conf_user\n')
+    for e in all_emails:
+        
+        if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
+            E = funcy.first(funcy.where(ilrn,email=e))
+            goo = E['login_id'][3:]
+            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
+            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
+            
+            # goo (minus G00) email, and name go into conf_users
+        
+        fout.write(e+',')
+        if e in persl_emails:
+            fout.write('1,')
+        else:
+            fout.write('0,')
+        if e in ilrn_emails:
+            fout.write('1,')
+        else:
+            fout.write('0,')
+        if e in conusr_emails:
+            fout.write('1,')
+        else:
+            fout.write('0,')
+        fout.write('\n')
+    fout.close()
+    #
+
+    #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
+    print('done')
+
+
+def get_best_user_record(rec_list):
+    # rule is lowest id is used, unless in list of exceptions
+    
+    # key should be replaced with value. These ones don't follow typical lowest id rule.
+    exceptions = { 120: 883,   # Gary Burce
+                538: 955,   # Ronna de benedetti
+                127: 957,   # Mia Cabello
+                802: 963,   # binh vo
+                1053: 963,
+                923: 971,   # brianna aguilar
+                933: 970,   # elif konus
+                473: 879,   # tania maheu
+                }
+    
+    # sort records by id
+    s_recs = sorted(rec_list, key=sort_id)
+    preferred = s_recs[0]
+    
+    # check for exceptions
+    if int(preferred['id']) in exceptions:
+        new_preferred_id = exceptions[int(preferred['id'])]
+        for r in rec_list:
+            if int(r['id']) == new_preferred_id:
+                preferred = r
+                break
+    
+    return preferred
+
+
+
+# Get dup rows like this:
+# SELECT * FROM conf_users
+# WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 )
+# ORDER BY goo;    
+
+
+
+def correct_dup_user_rows():
+    '''
+    Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts:
+    - email is either with @gavilan.edu or without, or with @my.gavilan.edu
+    - but goo is correct
+    
+    1. change login functions to look up GOO in conf_users
+       - still add new row if not present
+    2. Find dups
+       a. get lowest id (L), that is the correct one
+       b. for higher id (H), replace H with L in: conf_signups.user,  conf_answers.user,  conf_hosts.host,    conf_logs <- abandonded     gavi_logs <-- can't really
+    
+    3. AND make a big overview page or report for all users/all years so I can check that records are complete
+       - person
+       - year or semester (conferences table)
+       - their signups, hostings
+       - there 'attended' and/or comments
+       
+    '''
+    fname = 'cache/conf_users_dups.csv'
+    with open(fname, 'r') as f:
+        reader = csv.DictReader(f)
+        data = list(reader)
+    #print(data)
+    pairs = funcy.group_by(lambda r: r['goo'], data)
+    #print(json.dumps(pairs,indent=2))
+    
+    counter = 0
+    
+    
+    
+    for goo,recs in pairs.items():
+        if goo == "0":
+            continue     # skip fake user
+        counter += 1
+        
+        #emails = funcy.pluck('email',recs)
+        #print(list(emails))
+        
+        #ids = funcy.pluck('id',recs)
+        #print(list(ids))
+        
+        s_recs = sorted(recs, key=sort_id)
+        preferred = get_best_user_record(s_recs)
+        
+        if 1:
+            for i,rec in enumerate(s_recs):
+                col1 = "   "
+                if rec == preferred: col1 = " * "
+                # print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}")
+        
+        s_recs.remove(preferred)
+        
+        # Now loop through the non-preferred records, and update tables
+        for NP in s_recs:
+            #print(f"I want to remove conf_user id {NP['id']}")
+            print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};")
+            print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};")
+            print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};")
+            # SELECT * FROM conf_answers where user=1142
+            # SELECT * FROM conf_hosts where host=1142
+        
+        #print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records")
+        #print()
+    #print(f"Total dups: {counter}")
+
+
+
+def sort_id(a):
+    return int(a['id'])
+
+
+        
+if __name__ == "__main__":
+    print ("")
+    options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] ,
+                2: ['generate sql to fix conf_user dups', correct_dup_user_rows] , 
+    }
+
+
+
+
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+    
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+    
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
--- a/localcache2.py
+++ b/localcache2.py
@ -341,6 +341,8 @@ def create_schedule_table_if_not_exists():
 # Populate schedule table and correlate to courses table
 def courses_to_sched():

+    # TODO: fix units when they are variable... change to float in between range. round to 0.5 unit.
+
    EXECUTE = 1

    seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
--- a/outcomes2022.py
+++ b/outcomes2022.py
@ -30,7 +30,7 @@ from path_dict import PathDict
 outputfile = ''
 csvwriter = ''

-TERM = 181
+TERM = 183


 def escape_commas(s):
@ -149,40 +149,43 @@ def ilearn_shell_slo_to_csv(shell_slos):
        L.append("o%i_assd" % i)
    df = pd.DataFrame(columns=L)
    for S in shell_slos:
-        short = S[0]
-        this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
-        if len(S)>1:
-            full = S[1]
-            this_crs['has_outcomes'] = 1
+        try:
+            short = S[0]
+            this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
+            if len(S)>1:
+                full = S[1]
+                this_crs['has_outcomes'] = 1
            
-            i = 1
+                i = 1
            
-            for o in full['outcomes']:
-                try:
-                    this_id = int(o['outcome']['id'])
-                    this_crs['o%i_id' % i] = o['outcome']['id']
-                except Exception as e:
-                    this_crs['o%i_id' % i] = '!'
-                try:
-                    this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
-                except Exception as e:
-                    this_crs['o%i_desc' % i] = '!'
-                try:
-                    assessed = 0
-                    if full['full_outcomes'][this_id]['assessed'] == 'True':
-                        assessed = 1
-                    this_crs['o%i_assd' % i] = assessed
-                except Exception as e:
-                    this_crs['o%i_assd' % i] = '!'
-                try:
-                    this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
-                except Exception as e:
-                    this_crs['o%i_vendor_guid' % i] = '!'
+                for o in full['outcomes']:
+                    try:
+                        this_id = int(o['outcome']['id'])
+                        this_crs['o%i_id' % i] = o['outcome']['id']
+                    except Exception as e:
+                        this_crs['o%i_id' % i] = '!'
+                    try:
+                        this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
+                    except Exception as e:
+                        this_crs['o%i_desc' % i] = '!'
+                    try:
+                        assessed = 0
+                        if full['full_outcomes'][this_id]['assessed'] == 'True':
+                            assessed = 1
+                        this_crs['o%i_assd' % i] = assessed
+                    except Exception as e:
+                        this_crs['o%i_assd' % i] = '!'
+                    try:
+                        this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
+                    except Exception as e:
+                        this_crs['o%i_vendor_guid' % i] = '!'
                
-                i += 1
+                    i += 1
+            df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
+            df = pd.concat( [df, df2], ignore_index = True )

-        df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
-        df = pd.concat( [df, df2], ignore_index = True )
+        except Exception as e:
+            print(f"*** Exception {e} with {S}\n\n")
        
    df.to_csv('cache/outcome.csv')
    print(df)
--- a/stats.py
+++ b/stats.py
@ -1393,6 +1393,11 @@ def report_student_stats():
    # Save the figure in an HTML file
    pio.write_html(fig, 'cache/student_pct_onlinecourse.html')

+
+def test_rpy():
+    pass
+
+'''
 def test_rpy():
    from rpy2 import robjects
    from rpy2.robjects import Formula, Environment
@ -1439,8 +1444,162 @@ def test_rpy2():
    utils = importr('utils')
    pi = robjects.r['pi']
    print(f"pi={pi[0]}")
+'''


+
+
+import pandas as pd
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import silhouette_score
+from sklearn.tree import DecisionTreeClassifier, export_graphviz
+import graphviz
+
+from joblib import dump, load
+
+
+def cluster_by_mode_1():
+
+    # Load the data from a CSV file
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Extract the relevant features
+    features = data[['num_semesters', 'num_units', 'inperson_units', 'hybrid_units', 'online_units']]
+
+    # Standardize the features
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(features)
+
+    # Perform clustering with different numbers of clusters
+    for n_clusters in range(4, 12):
+        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+        kmeans.fit(scaled_features)
+    
+        # Add the cluster labels to the original data
+        data[f'cluster_{n_clusters}'] = kmeans.labels_
+    
+        print(f"Clustering with {n_clusters} clusters:")
+        print(data.groupby(f'cluster_{n_clusters}').size())
+        print()
+
+    # Save the updated data with cluster labels to a new CSV file
+    data.to_csv('cache/students_bymode_with_clusters_1.csv', index=False)
+    
+
+
+
+def cluster_by_mode():
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Split features and target
+    X = data.drop('g_number', axis=1)
+    y = data['g_number']
+
+    # Train decision tree classifier
+    clf = DecisionTreeClassifier()
+    clf.fit(X, y)
+
+    # Visualize decision tree
+    dot_data = export_graphviz(clf, out_file=None, 
+                               feature_names=X.columns,
+                               class_names=y.unique(),
+                               filled=True, rounded=True,
+                               special_characters=True)
+    graph = graphviz.Source(dot_data)
+    graph.render('decision_tree', view=True)
+    data.to_csv('cache/students_bymode_with_dt.csv', index=False)
+
+
+def cluster_by_mode_2():
+
+    # Load the data from a CSV file
+    data = pd.read_csv('cache/students_bymode.csv')
+
+    # Extract the features (excluding the 'g_number' column)
+    features = data.drop('g_number', axis=1)
+
+    # Scale the features to have zero mean and unit variance
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(features)
+
+    # Determine the ideal number of clusters using the elbow method
+    inertias = []
+    for k in range(4, 40):  # Try different values of k (e.g., 1 to 10)
+        kmeans = KMeans(n_clusters=k, random_state=42)
+        kmeans.fit(scaled_features)
+        inertias.append(kmeans.inertia_)
+
+    # Plot the elbow curve
+    import matplotlib.pyplot as plt
+    plt.plot(range(4, 40), inertias, marker='o')
+    plt.xlabel('Number of Clusters (k)')
+    plt.ylabel('Inertia')
+    plt.title('Elbow Method')
+    plt.show()
+
+    # Choose the ideal number of clusters based on the elbow curve
+    ideal_k = 12  # Adjust this based on your observation
+
+    # Perform clustering with the ideal number of clusters
+    kmeans = KMeans(n_clusters=ideal_k, random_state=42)
+    kmeans.fit(scaled_features)
+
+
+
+    # Get the cluster labels for each data point
+    labels = kmeans.labels_
+
+    # Add the cluster labels to the original data
+    data['Cluster'] = labels
+
+    # Save the cluster labels to a new CSV file
+    data.to_csv('cache/students_bymode_with_clusters_2.csv', index=False)
+
+    # Get the cluster centers (centroids)
+    centroids = scaler.inverse_transform(kmeans.cluster_centers_)
+
+    # Print the cluster centers
+    for i, centroid in enumerate(centroids):
+        print(f"Cluster {i} center:")
+        for feature, value in zip(features.columns, centroid):
+            print(f"{feature}: {value}")
+        print()
+        
+
+    # Save the trained objects to files
+    dump(kmeans, 'kmeans.joblib')
+    dump(scaler, 'scaler.joblib')
+
+    # Load the saved objects for future use
+    loaded_kmeans = load('kmeans.joblib')
+    loaded_scaler = load('scaler.joblib')
+
+    # Use the loaded objects for predictions
+    new_data_scaled = loaded_scaler.transform(new_data)
+    predictions = loaded_kmeans.predict(new_data_scaled)
+    
+
+def cluster_with_new_data():
+    ## NOT TESTED
+    # need to save the kmeans and scaler objects from previous step.
+    
+    # Load the new data
+    new_data = pd.read_csv('new_data.csv')
+
+    # Extract the features from the new data
+    new_features = new_data.drop('g_number', axis=1)
+
+    # Scale the new features using the fitted scaler
+    scaled_new_features = scaler.transform(new_features)
+
+    # Predict the cluster labels for the new data
+    new_labels = kmeans.predict(scaled_new_features)
+
+    # Add the cluster labels to the new data
+    new_data['Cluster'] = new_labels
+
 if __name__ == "__main__":
    options = { 1: ['get all historical grades from ilearn',get_all] ,  
                2: ['process grades csv file',process_grades] , 
@ -1462,6 +1621,7 @@ if __name__ == "__main__":
                30: ['visualize course modes multi semester', visualize_course_modes_multi_semester],
                31: ['Report on student stats', report_student_stats],
                32: ['test rpy', test_rpy],
+                33: ['cluster students by mode', cluster_by_mode],
              }
    print ('')

--- a/queries.sql
+++ b/queries.sql
@ -66,6 +66,14 @@ where (s.type='online' or s.type='hybrid' or s.type='online line')
 	and not cc.path='sstaff@gavilan.edu'
 order by u.sortable_name;

+-- names that are uppercase
+SELECT * 
+FROM canvas.users
+WHERE REGEXP_LIKE(name, '^[A-Z]+[[:space:]]')
+order by last_logged_out ;
+
+
+

 -- for outlook
 select string_agg(distinct LOWER(cc.path), '; ') from canvas.courses c
@ -112,7 +120,12 @@ group by u.sortable_name
 order by total desc, online desc, onlinelive desc, hybrid desc;


+-- find (fix?) rows where units are variable
+SELECT * FROM canvas.schedule
+WHERE units LIKE '%-%';

+SELECT * FROM canvas.schedule
+WHERE units LIKE '%/%';

 -- num units
 select u.sortable_name, p.sis_user_id,
@ -145,6 +158,76 @@ order by total desc;



+-- students.csv: each student, num_semesters, num_units, num_f2f, num_online, num_hybrid
+
+SELECT 
+    p.sis_user_id as g_number,
+    COUNT(DISTINCT s.sem) AS num_semesters,
+    SUM(s.units::FLOAT) AS num_units,
+    sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson_units,
+    sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid_units,
+    sum(CASE WHEN s.type = 'online' or s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS online_units
+FROM 
+    canvas.users u
+    JOIN canvas.enrollments e ON u.id = e.user_id
+    JOIN canvas.courses c ON e.course_id = c.id
+    JOIN canvas.schedule s ON c.id = s.canvascourse
+    JOIN canvas.pseudonyms p ON u.id = p.user_id
+WHERE 
+    e.workflow_state = 'active' 
+    AND e.type = 'StudentEnrollment'
+    AND u.id IN (
+		SELECT u.id FROM canvas.enrollments AS e
+		JOIN canvas.users AS u ON e.user_id=u.id
+		JOIN canvas.courses AS c ON e.course_id=c.id
+		WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
+		AND e.workflow_state='active'
+		AND e.type='StudentEnrollment'
+		GROUP BY u.id
+	)
+GROUP BY 
+    p.sis_user_id
+ORDER BY 
+    num_semesters, p.sis_user_id;
+
+   
+   
+
+   
+   
+   
+
+-- students.csv: each student, num_semesters, num_units
+
+SELECT 
+    p.sis_user_id as g_number,
+    COUNT(DISTINCT s.sem) AS num_semesters,
+    SUM(s.units::FLOAT) AS num_units
+FROM 
+    canvas.users u
+    JOIN canvas.enrollments e ON u.id = e.user_id
+    JOIN canvas.courses c ON e.course_id = c.id
+    JOIN canvas.schedule s ON c.id = s.canvascourse
+    JOIN canvas.pseudonyms p ON u.id = p.user_id
+WHERE 
+    e.workflow_state = 'active' 
+    AND e.type = 'StudentEnrollment'
+    AND u.id IN (
+		SELECT u.id FROM canvas.enrollments AS e
+		JOIN canvas.users AS u ON e.user_id=u.id
+		JOIN canvas.courses AS c ON e.course_id=c.id
+		WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
+		AND e.workflow_state='active'
+		AND e.type='StudentEnrollment'
+		GROUP BY u.id
+	)
+GROUP BY 
+    p.sis_user_id
+ORDER BY 
+    num_semesters, p.sis_user_id;
+
+
+
 -- each class

 select u.sortable_name, c.course_code, s.type, s.units::FLOAT
--- a/users.py
+++ b/users.py
@ -1796,8 +1796,8 @@ def track_user(id=0,qid=0):
        url_addition = ""
        
        if 1:    # hard code dates
-            start_date = "2023-08-01T00:00:00-07:00"
-            end_date   = "2024-01-01T00:00:00-07:00"
+            start_date = "2024-01-01T00:00:00-07:00"
+            end_date   = "2024-07-01T00:00:00-07:00"
            url_addition = f"?start_time={start_date}&end_time={end_date}"
        elif 'last_days_log' in info:
            print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))
@ -2041,80 +2041,6 @@ def find_new_teachers():
    for J in jj:
        print( J['teacher'])
    
-def user_db_sync():
-    # currently in db
-    conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1")
-    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
-
-    #fetch all staff from ilearn                        ILRN            unique emails                                             
-    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
-    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
-
-    for e in ilrn_emails:
-        
-        if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
-            E = funcy.first(funcy.where(ilrn,email=e))
-            goo = E['login_id'][3:]
-            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
-            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
-            
-
-
-def user_db_sync2():
-    #fetch all personnel dir entries from dir_api.php.  PERSL           unique emails                                             
-    persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
-    persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
-    #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
-    #
-    #fetch all staff from ilearn                        ILRN            unique emails                                             
-    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
-    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
-    #
-    #fetch all conf_users from dir_api.php              CONUSR          unique emails                                             
-    conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
-    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
-    
-    #fetch all gavi_personnel_ext  from dir_api.php     GPEREXT         must have column 'personnel' or 'c_users' or both.        
-    gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
-    
-    all_emails = set(persl_emails)
-    all_emails.update(ilrn_emails)
-    all_emails.update(conusr_emails)
-    
-    all_emails = list(all_emails)
-    all_emails.sort()
-    
-    fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
-    fout.write('email,personnel_dir,ilearn,conf_user\n')
-    for e in all_emails:
-        
-        if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
-            E = funcy.first(funcy.where(ilrn,email=e))
-            goo = E['login_id'][3:]
-            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
-            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
-            
-            # goo (minus G00) email, and name go into conf_users
-        
-        fout.write(e+',')
-        if e in persl_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        if e in ilrn_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        if e in conusr_emails:
-            fout.write('1,')
-        else:
-            fout.write('0,')
-        fout.write('\n')
-    fout.close()
-    #
-
-    #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
-    print('done')

 import traceback
Author	SHA1	Message	Date
phowell	d66217ec63	flex day helper	2024-07-22 15:20:20 -07:00
phowell	6da470ad1f	summer 24 changes	2024-07-22 15:19:31 -07:00