sandboxes; collating student histories

2023-06-20 07:02:25 -07:00 · 2023-06-20 07:02:25 -07:00 · b4dd9fdb2f
parent dc512850f3
commit b4dd9fdb2f
4 changed files with 612 additions and 11 deletions
--- a/courses.py
+++ b/courses.py
@ -1283,30 +1283,76 @@ def xlist_cwe():
 def modify_courses():
    if 1:
        # enroll teacher
        c = '17987'
        usrid = '1'
        try:
            u3 = url + "/api/v1/courses/%s/enrollments" % c
            data2 = { "enrollment[type]":"TeacherEnrollment", "enrollment[user_id]":usrid,
                    "enrollment[enrollment_state]":"active" }
            r4 = requests.post(u3, headers=header, params=data2)
            print(json.dumps(json.loads(r4.text),indent=2))
            print()
        except Exception as e:
            print('****%s' % str(e))
    if 0:
        # publish and make available to auth users
        for c in [18038, 18039, 18040, 18041, 18042, 18043, 18044]:
            try:
                #print(R)
                print(f'*Doing course id: {c}')
                courseid = c
                #d = getCourses(courseid)
                #print("\tconclude on: %s" % d['end_at'])
                data = { 'course[is_public_to_auth_users]': True, 'course[event]': 'offer' } 
                t = url + '/api/v1/courses/' + str(courseid)
                r3 = requests.put(t, headers=header, params=data)
                result = json.loads(r3.text)
                if 'name' in result:
                    print(f"Name: {result['name']}")
                if 'workflow_state' in result:
                    print(f"  State: {result['workflow_state']}")
                if 'is_public_to_auth_users' in result:
                    print(f"  Public: {result['is_public_to_auth_users']}")
                #x = input('enter to continue')
            except Exception as e:
                print('****%s' % str(e))
 def create_sandboxes():
-    names = input("what are the initials of people? Separate with spaces  ").split()
+    # ('ED','82'), 
-    for N in names: 
+    sandboxes = [ ('JH','45324'), ('PK','38183'), ('GM','5167'), ('BS','19231'),
-        print(N)
+                  ('ST','303'), ('KW','5145')]
    for (N,usrid) in sandboxes:
        #names = input("what are the initials of people? Separate with spaces  ").split()
        coursename = f"{N} Sandbox SU23 Humanizing STEM"
        coursecode = f"{N} SU23 Sandbox STEM"
        print(f"Creating course: {coursename} for {N}, id: {usrid}")
        u2 = url + "/api/v1/accounts/1/courses"
        data = { 
-            "course[name]": "%s Sandbox SU21 G2" % N,
+            "course[name]": coursename,
-            "course[code]": "%s SU21 G2" % N,
+            "course[code]": coursecode,
            "course[term_id]": "8",
        }
        #print(u2)
        r3 = requests.post(u2, headers=header, params=data)
        course_data = json.loads(r3.text)
        id = course_data['id']
        print(f"created course id {id}")
        u3 = url + "/api/v1/courses/%i/enrollments" % id
-        usrid = input("id of %s? " % N)
+        #usrid = input("id of %s? " % N)
-        data2 = { "enrollment[type]":"TeacherEnrollment", "enrollment[user_id]":usrid}
+        data2 = { "enrollment[type]":"TeacherEnrollment", "enrollment[user_id]":usrid,
                  "enrollment[enrollment_state]":"active" }
        r4 = requests.post(u3, headers=header, params=data2)
        #print(json.dumps(json.loads(r4.text),indent=2))
        print()
        x = input("enter to continue")
 def course_term_summary_2():
@ -1641,6 +1687,7 @@ if __name__ == "__main__":
                15: ['List users who passed GOTT 1 / Bootcamp', get_gott1_passers],
                16: ['List users who passed Plagiarism Module', get_plague_passers],
                18: ['Create some sandbox courses', create_sandboxes],
                19: ['make courses visible to auth users', modify_courses],
                20: ['process the semester overview output (10)', course_term_summary_2],
                22: ['Get a course info by id',getCourses],
                23: ['Reset course conclude date',update_course_conclude],   
@ -1649,7 +1696,7 @@ if __name__ == "__main__":
                32: ['Get course ext tools', get_course_ext_tools],  
                33: ['Add GavConnect to a course', do_gav_connect],
                17: ['Remove "new analytics" from all courses navs in a semester', remove_n_analytics],
-                19: ['Add course evals', add_evals],
+                21: ['Add course evals', add_evals],
                27: ['Fine tune term dates and winter session', course_dates_terms],
                3: ['Cross-list classes', xlist ],
                6: ['Cross list helper', eslCrosslister],
--- a/localcache.py
+++ b/localcache.py
@ -16,6 +16,7 @@ from pipelines import sync_non_interactive, url, header, gp, dean
 mycourses = {}
 local_data_folder = 'cache/canvas_data/'
 #sqlite_file = local_data_folder + 'data20230613.db'    #'data_su20_4hr_blocks.db' 
 sqlite_file = local_data_folder + 'data.db'    #'data_su20_4hr_blocks.db' 
 mylog = codecs.open(local_data_folder + 'canvas_data_log.txt','w')
@ -1800,6 +1801,59 @@ def do_encoding():
        for k,v in x.items():
            print("\t",k,":",v)
 def printer(x):
    print(x)
 def all_students_history(handler=printer, limit=1000):
    qry = """SELECT
  u.name AS user_name,
  u.canvasid,
  e.workflow AS workflow,
  e.created,
  e.updated,
  c.name AS course_name,
  t.name AS term_name
 FROM
  users u
 JOIN
  enrollment e ON u.id = e.user_id
 JOIN
  courses c ON e.course_id = c.id
 JOIN
  terms t ON c.termid = t.id
 WHERE
  e.type = 'StudentEnrollment'
 ORDER BY
  u.sortablename, e.created ASC;"""
    connection = sqlite3.connect(sqlite_file)
    connection.row_factory = dict_factory
    cursor = connection.cursor()
    cursor.execute(qry)
    # Fetch the first row
    line = cursor.fetchone()
    i = 1
    # Process rows one by one
    while line is not None:
        # Process the current row
        handler(line)
        i += 1
        if i > limit: break
        # Fetch the next row
        line = cursor.fetchone()
    # Close the cursor and connection
    cursor.close()
    connection.close()
 if __name__ == "__main__":
@ -1826,6 +1880,7 @@ if __name__ == "__main__":
            19: ['Build DB schedule from json files', build_db_schedule],
            20: ['Process enrollment data', process_enrollment_data],
            21: ['Encode data', do_encoding],
            22: ['all students course history', all_students_history],
            #19: ['add evals for a whole semester', instructor_list_to_activate_evals],
            #16: ['Upload new employees to flex app', employees_refresh_flex],
            }
--- a/semesters.py
+++ b/semesters.py
@ -0,0 +1,273 @@
 import json
 standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
            'Fall 2023', 'Summer 2023', 'Spring 2023', 'Winter 2023',
            'Fall 2022', 'Summer 2022', 'Spring 2022', 'Winter 2022',
            'Fall 2021', 'Summer 2021', 'Spring 2021', 
            'Fall 2020', 'Summer 2020', 'Spring 2020', 'Winter 2020',
            'Fall 2019', 'Summer 2019', 'Spring 2019', 'Winter 2019',
            'Fall 2018', 'Summer 2018', 'Spring 2018',
            'Fall 2017', 'Summer 2017', 'Spring 2017', ]
 code =  'fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
 begin = ['08/26','06/10','01/29','01/02',
         '08/28','06/12','01/30','01/03',
         '08/22','06/13','01/31','01/04',
         '08/23','06/14','02/01',
         '08/24','06/15','01/27','01/02',
         '08/26','06/17','01/28','01/02',
         '08/27','06/18','01/29',
         '08/28','06/19','01/30']
 canvas_label = []
 semester_list = {}
 season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
 for s in list(zip(standard,code,begin)):
    season,year = s[0].split(' ')
    cl = year + " " + season
    sem_record = {'name': s[0], 'code': s[1], 'start': s[2] + '/' + s[1][-2:], 'number': year + season_to_number[s[0].split(' ')[0]]}
    semester_list[s[0]] = sem_record
    semester_list[s[1]] = sem_record
    canvas_label.append(cl)
    semester_list[cl] = sem_record
 # print(json.dumps(semester_list,indent=2))
 """
 semester_list = {
  "Fall 2022": {
    "name": "Fall 2022",
    "code": "fa22",
    "start": "08/22/22"
  },
  "fa22": {
    "name": "Fall 2022",
    "code": "fa22",
    "start": "08/22/22"
  },
  "2022 Fall": {
    "name": "Fall 2022",
    "code": "fa22",
    "start": "08/22/22"
  },
  "Summer 2022": {
    "name": "Summer 2022",
    "code": "su22",
    "start": "06/13/22"
  },
  "su22": {
    "name": "Summer 2022",
    "code": "su22",
    "start": "06/13/22"
  },
  "2022 Summer": {
    "name": "Summer 2022",
    "code": "su22",
    "start": "06/13/22"
  },
  "Spring 2022": {
    "name": "Spring 2022",
    "code": "sp22",
    "start": "01/31/22"
  },
  "sp22": {
    "name": "Spring 2022",
    "code": "sp22",
    "start": "01/31/22"
  },
  "2022 Spring": {
    "name": "Spring 2022",
    "code": "sp22",
    "start": "01/31/22"
  },
  "Fall 2021": {
    "name": "Fall 2021",
    "code": "fa21",
    "start": "08/23/21"
  },
  "fa21": {
    "name": "Fall 2021",
    "code": "fa21",
    "start": "08/23/21"
  },
  "2021 Fall": {
    "name": "Fall 2021",
    "code": "fa21",
    "start": "08/23/21"
  },
  "Summer 2021": {
    "name": "Summer 2021",
    "code": "su21",
    "start": "06/14/21"
  },
  "su21": {
    "name": "Summer 2021",
    "code": "su21",
    "start": "06/14/21"
  },
  "2021 Summer": {
    "name": "Summer 2021",
    "code": "su21",
    "start": "06/14/21"
  },
  "Spring 2021": {
    "name": "Spring 2021",
    "code": "sp21",
    "start": "02/01/21"
  },
  "sp21": {
    "name": "Spring 2021",
    "code": "sp21",
    "start": "02/01/21"
  },
  "2021 Spring": {
    "name": "Spring 2021",
    "code": "sp21",
    "start": "02/01/21"
  },
  "Fall 2020": {
    "name": "Fall 2020",
    "code": "fa20",
    "start": "08/24/20"
  },
  "fa20": {
    "name": "Fall 2020",
    "code": "fa20",
    "start": "08/24/20"
  },
  "2020 Fall": {
    "name": "Fall 2020",
    "code": "fa20",
    "start": "08/24/20"
  },
  "Summer 2020": {
    "name": "Summer 2020",
    "code": "su20",
    "start": "06/15/20"
  },
  "su20": {
    "name": "Summer 2020",
    "code": "su20",
    "start": "06/15/20"
  },
  "2020 Summer": {
    "name": "Summer 2020",
    "code": "su20",
    "start": "06/15/20"
  },
  "Spring 2020": {
    "name": "Spring 2020",
    "code": "sp20",
    "start": "01/27/20"
  },
  "sp20": {
    "name": "Spring 2020",
    "code": "sp20",
    "start": "01/27/20"
  },
  "2020 Spring": {
    "name": "Spring 2020",
    "code": "sp20",
    "start": "01/27/20"
  },
  "Fall 2019": {
    "name": "Fall 2019",
    "code": "fa19",
    "start": "08/26/19"
  },
  "fa19": {
    "name": "Fall 2019",
    "code": "fa19",
    "start": "08/26/19"
  },
  "2019 Fall": {
    "name": "Fall 2019",
    "code": "fa19",
    "start": "08/26/19"
  },
  "Summer 2019": {
    "name": "Summer 2019",
    "code": "su19",
    "start": "06/17/19"
  },
  "su19": {
    "name": "Summer 2019",
    "code": "su19",
    "start": "06/17/19"
  },
  "2019 Summer": {
    "name": "Summer 2019",
    "code": "su19",
    "start": "06/17/19"
  },
  "Spring 2019": {
    "name": "Spring 2019",
    "code": "sp19",
    "start": "01/28/19"
  },
  "sp19": {
    "name": "Spring 2019",
    "code": "sp19",
    "start": "01/28/19"
  },
  "2019 Spring": {
    "name": "Spring 2019",
    "code": "sp19",
    "start": "01/28/19"
  },
  "Fall 2018": {
    "name": "Fall 2018",
    "code": "fa18",
    "start": "08/27/18"
  },
  "fa18": {
    "name": "Fall 2018",
    "code": "fa18",
    "start": "08/27/18"
  },
  "2018 Fall": {
    "name": "Fall 2018",
    "code": "fa18",
    "start": "08/27/18"
  },
  "Summer 2018": {
    "name": "Summer 2018",
    "code": "su18",
    "start": "06/18/18"
  },
  "su18": {
    "name": "Summer 2018",
    "code": "su18",
    "start": "06/18/18"
  },
  "2018 Summer": {
    "name": "Summer 2018",
    "code": "su18",
    "start": "06/18/18"
  },
  "Spring 2018": {
    "name": "Spring 2018",
    "code": "sp18",
    "start": "01/29/18"
  },
  "sp18": {
    "name": "Spring 2018",
    "code": "sp18",
    "start": "01/29/18"
  },
  "2018 Spring": {
    "name": "Spring 2018",
    "code": "sp18",
    "start": "01/29/18"
  }
 }
 """
--- a/stats.py
+++ b/stats.py
@ -597,6 +597,20 @@ def grades_to_vectors(boolean=0, verbose=0):
 def course_main_record():
    return json.loads(codecs.open('cache/courses/course_main_record.json','r','utf-8').read())
 def courses_to_vector_ordered(course_list):
    # each course is (name, semester_order, score)
    template = course_main_record()
    lookup = {}
    for i,c in enumerate(template):
        lookup[c] = i
    vector = ['0' for x in range(len(template))]
    for course,order,score in course_list:
        goodname = shell2course(course)
        if goodname:
            vector[lookup[goodname]] = str(order)
    return vector
 def courses_to_vector(course_list, boolean=1):
    #print(course_list)
    yesval = "true" if boolean else 1
@ -658,6 +672,215 @@ def all_course_names():
    mr.write(json.dumps(master_record,indent=2))
 from semesters import semester_list, canvas_label
 from semesters import code as semester_order
 from localcache import all_students_history
 from datetime import datetime, timedelta
 def semester_dates():
    #print()
    for c in canvas_label:
        print(semester_list[c])
        length = 15
        if semester_list[c]['code'][0:2] == 'su':
            length = 5
        start_date = semester_list[c]['start']
        # Convert the date string to a datetime object
        date_object = datetime.strptime(start_date, '%m/%d/%y')
        start_fmt = date_object.strftime('%a %b %d, %Y')
        # Add 15weeks, 5days to the date
        new_date = date_object + timedelta(weeks=15)
        new_date = new_date + timedelta(days=5)
        # Format the new date as a string
        new_date_string = new_date.strftime('%m/%d/%y')
        end_fmt = new_date.strftime('%a %b %d, %Y')
        # Print the new date
        print(f"start: {start_fmt}, end: {end_fmt}")
 current_student = ""
 current_student_block = []
 current_student_info = {'first':'', 'last':''}
 normalized_blocks = []
 ignore_courses = "El,zACCT20,GASPAR".split(",")
 seen_courses = []
 def course_line_process(line):
    global current_student, current_student_block, seen_courses, normalized_blocks, current_student_info
    sem = line['term_name']
    m1 = re.search(r'^(\d\d\d\d)\s(\w+)$', sem)
    if not m1:   # is NOT an academic semester, skip
        return
    uid = line['canvasid']
    if uid != current_student:
        if current_student_block:
            current_student_block.append(current_student_info)
            normalized_blocks.append(current_student_block)
        current_student_block = []
        current_student_info = {'first':semester_list[sem]['code'], 'last':''}
        current_student = uid
        #print(f"Student: {uid} ({line['user_name']})")
    # line is a dict
    current_student_info['last'] = semester_list[sem]['code']
    year, season = m1.group(1), m1.group(2)
    date_format = "%Y-%m-%d %H:%M:%S.%f"
    create_dt = datetime.strptime(line['created'], date_format)
    update_dt = datetime.strptime(line['updated'], date_format)
    sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y')
    course = line['course_name']
    c_parts = course.split(' ')
    if c_parts[0] in ignore_courses or c_parts[0] in seen_courses:
        return
    classname = shell2course(c_parts[0])
    if not classname:
        # print empty dict entry for initial setup
        # print(f"    \"{c_parts[0]}\": \"\",")
        seen_courses.append(c_parts[0])
    else:
        #
        flow = line['workflow']
        mark = '+'
        if flow == "deleted": mark = '-'
        # normal start & finish, give add date
        add_day = sem_start - create_dt
        add_day = add_day.days
        sign = '-'
        if add_day < 0:
            add_day = -add_day
            sign = '+'
        #print(f"    {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}")
        temp_usr_name = re.sub(r',','',line['user_name'])
        current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}")
        if flow == "deleted":
            # deleted, give delete date
            del_day = sem_start - update_dt
            del_day = del_day.days
            sign = '-'
            if del_day < 0:
                del_day = -del_day
                sign = '+'
            #print(f"    {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}")
            current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}")
 def normalize_course_histories():
    global normalized_blocks, current_student_block, current_student_info
    all_students_history(course_line_process, limit=99910000)
    current_student_block.append(current_student_info)
    normalized_blocks.append(current_student_block)
    codecs.open('cache/normalized_student_add_drop.json','w','utf-8').write(json.dumps(normalized_blocks,indent=2))
    # let's see if we can get grades...
    grades_by_student_course = defaultdict(dict)
    print("Doing grades...")
    with codecs.open('cache/courses_student_scores.csv','r','utf-8') as gradesfile:
        for s in gradesfile:
            parts = s.split(',')
            stu = int(parts[0])
            #print(stu)
            for c in parts[1:]:
                try:
                    #print(c)
                    crs,gra = c.split('|')
                    grades_by_student_course[stu][crs] = gra
                except Exception as e:
                    pass
    # go through again
    print("Second pass of grades and student history...")
    student_history = codecs.open('cache/normalized_student_history.csv','w','utf-8')
    student_history.write("studentid,studentname,course,action,when,grade,sem_name,first_sem,last_sem,tenure_length,sem_index\n")
    semester_order.reverse()
    for blk in normalized_blocks:
        info = blk[-1]
        first = semester_order.index(info['first']) + 1
        last = semester_order.index(info['last']) + 1 
        length = last - first + 1
        for course in blk[:-1]:
            parts = course.split(',')
            #print(parts)
            sem = parts[5]
            sem_index = semester_order.index(sem) - first + 2
            stu = int(parts[0])
            crs = parts[2]
            grade = ""
            if stu in grades_by_student_course:
                if crs in grades_by_student_course[stu]:
                    grade = grades_by_student_course[stu][crs]
            student_history.write(",".join([parts[0], parts[1], parts[2], parts[3], parts[4], grade, parts[5], str(first), str(last), str(length), str(sem_index), ]) + '\n')
    # make "unified records" or one line per student
    student_history_2 = codecs.open('cache/normalized_student_history2.csv','w','utf-8')
    allcourse = course_main_record()
    #print(allcourse)
    template = ['studentid', 'studentname', 'tenure_length']
    template.extend(allcourse)
    #print(template)
    student_history_2.write( ",".join(template) + "\n" )
    for blk in normalized_blocks:
        student_block = []
        info = blk[-1]
        first = semester_order.index(info['first']) + 1
        last = semester_order.index(info['last']) + 1 
        length = last - first + 1
        temp_course_holder = {}
        temp_course_grade_holder = {}
        for course in blk[:-1]:
            parts = course.split(',')
            #print(parts)
            sem = parts[5]
            sem_index = semester_order.index(sem) - first + 2
            stu = int(parts[0])
            crs = parts[2]
            if parts[3] == 'add':
                temp_course_holder[crs] = sem_index
            elif parts[3] == 'del' and crs in temp_course_holder:
                del temp_course_holder[crs]
        # now the temp_course_holder has the courses and semesters
        for crs,sem_index in temp_course_holder.items():
            grade = ""
            if stu in grades_by_student_course:
                if crs in grades_by_student_course[stu]:
                    grade = grades_by_student_course[stu][crs]
            this_record = (crs, sem_index, grade)
            student_block.append(this_record)
        student_vector = [ parts[0], parts[1], str(length) ]
        student_vector.extend(courses_to_vector_ordered(student_block))
        student_history_2.write(",".join(student_vector) + '\n')
        #print(student_vector)
 def cluster_student_histories():
    infile = 'cache/courses_student_scores.csv'
    import pandas as pd
    import matplotlib.pyplot as plt
    from kneed import KneeLocator
    from sklearn.datasets import make_blobs
    from sklearn.cluster import KMeans
    from sklearn.metrics import silhouette_score
    from sklearn.preprocessing import StandardScaler
    df = pd.read_csv(infile)
@ -669,7 +892,10 @@ if __name__ == "__main__":
                5: ['test sem codes',codetest] , 
                6: ['get student data from orientations', get_student_orientations],
                7: ['manage course master list', all_course_names],
-                8: ['grades to vectors', grades_to_vectors]
+                8: ['grades to vectors', grades_to_vectors],
                9: ['semester startdates list', semester_dates],
                10: ['normalize course histories', normalize_course_histories],
                11: ['cluster student histories', cluster_student_histories],
              }
    print ('')