# statistics """ ## Investigate: Success rates (grades) of students in: - online courses (over all) - sync and async and online live - teachers/courses that have passed POCR (are all async?) - teachers that have done more than the minimum training in online teaching - in person classes, if grades are available ## Data collection - Choose how many semesters (10?) - Script 1 - given a CRN and Semester, download all grades - Check if grades were used and make sense - Compute mean, % > 70, median, etc. - Anonymization steps - replace teacher names w/ id number - replace student names w/ id number - replace course names w/ course code - Script 2 - given all semester schedules, generate lists of: - CRNs which are online, online live, hybrid, inperson, excluded - CRNs in which teacher and course have passed pocr (and semester is greater than their pass date) - CRNs in which teacher passed pocr for a different course (and semester is greater than their pass date) - CRNs to exclude, for example SP20, because of covid. Possibly SU20 and FA20 - CRNs with are POCR approved - CRNs in which teacher has done more than the minimum training in online teaching - Student ids which have participated in the online orientation over a certain threshold - Next steps: generate the x-reference for what categories teachers are in, and integrate into the main data file. - Next steps (June/July 2023) - add campus, time of day, and sem_order (which semester in their college career did they take it) columns - Organize rows by students + Develop a way to categorize them: by course set and/or score set (cluestering: kmeans, forest, etc) - Goals - display and summarize clusters of students on a dashboard - ongoing categorization (implying course recommendations and interventions) based on it - ## Hypothesis Testing - """ import codecs, os import json, csv, requests, sys, re from multiprocessing import Semaphore from statistics import mean, median, stdev from pipelines import fetch, url from courses import getCoursesInTerm, course_enrollment from localcache import get_course_enrollments from collections import defaultdict all_grades_file = f"cache/grades_all.csv" all_courses_file = f"cache/course_grades_all.csv" all_courses_file2 = f"cache/course_grades_compact.csv" all_courses_file3 = f"cache/course_grades_full.csv" all_courses_file4 = "cache/course_grades_full_bystudent.csv" all_courses_file5 = "cache/courses_passed_bystudent.csv" student_courses_scores = "cache/courses_student_scores.csv" student_orientation_participation = f'cache/participation_orientation_courses.json' def num(s): if s == '': return 0 s = re.sub(r'\.0','',s) try: return int(s) except ValueError: return float(s) def sem_num_to_code(sem_num): p = re.search(r'^(\d\d\d\d)(\d\d)$', sem_num) if p: yr = p.group(1)[2:4] sem = p.group(2) lookup = {'10':'wi','30':'sp', '50':'su', '70':'fa'} return f"{lookup[sem]}{yr}" return "" def sem_code_to_num(sem_code): # fa23 p = re.search(r'^([a-z]{2})(\d\d)$', sem_code) if p: s = p.group(1) y = p.group(2) lookup = {'wi':'10','sp':'30', 'su':'50', 'fa':'70'} return f"20{y}{lookup[s]}" return "" def codetest(): sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ') codes = 'fa21 wi22 sp23 su23 fa23 wi24'.split(' ') for s in sems: print("{}: {}".format(s, sem_num_to_code(s))) for c in codes: print("{}: {}".format(c, sem_code_to_num(c))) def get_all(): terms = '178 177 176 175 174 173 172 171 168 65 64 62 63 61 60 25 26 23 22 21'.split(' ') sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ') # Save grades to a CSV file with open(all_grades_file, "w", newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow(["crn", "sem", "coursecode", "s_can_id","g","name", "current", "final"]) for (term,sem) in zip(terms,sems): print(term,sem,"\n") courses = getCoursesInTerm(term,get_fresh=0,show=0,active=1) for c in courses: print(c['name']) c_code = c['course_code'] grades(writer, sem, c['id'], c_code) csvfile.flush() def grades(writer, sem, COURSE_ID, course_code): params = { "include[]": ["enrollments", "current_grading_period_scores"] } grades = fetch(url + f"/api/v1/courses/{COURSE_ID}/users",0, params) #grades = json.loads(grades.text) for student in grades: try: id = student["id"] name = student["name"] g = student["login_id"] print("\t", name) if student['enrollments'][0]['type'] == 'StudentEnrollment': grade = student["enrollments"][0]["grades"]["final_score"] current = student["enrollments"][0]["grades"]["current_score"] writer.writerow([COURSE_ID, sem, course_code, id, g, name, current, grade]) except Exception as e: print("Exception:", e) def get_student_orientations(): courses = {'iLearn Student Orientation 2022':'9768', # 8170 students 'Kickstart Online Orientation - Transfer':'36', # 6149 'Kickstart Online Orientation - New to College':'35', # 5392 'LIB732 SP18':'3295', # 2193 'LIB732 FA17':'2037', # 1868 'LIB732 SP17':'69', # 1645 'Kickstart Online Orientation - Returning':'37', # 1463 'iLearn Student Orientation 2023':'15924', # 1292 'LIB732 SU17':'1439' # 1281 } views_bycourse = {} all_student_ids = set() # get pageviews of each orientation course for c,i in courses.items(): print(c) cache_file_name = f'cache/participation_course_{i}.json' student_ids = [x[1] for x in get_course_enrollments(i)] all_student_ids.update(student_ids) if os.path.exists(cache_file_name): pv = json.loads(codecs.open(cache_file_name,'r','utf-8').read()) else: pv = get_student_page_views(i, student_ids) codecs.open(cache_file_name,'w','utf-8').write(json.dumps(pv,indent=2)) views_bycourse[i] = pv # add up pageviews for each student views_bystudent = {} for student_id in all_student_ids: views_bystudent[student_id] = sum([views_bycourse[i].get(student_id,0) for i in courses.values()]) codecs.open(student_orientation_participation,'w','utf-8').write(json.dumps(views_bystudent,indent=2)) def get_student_page_views(course_id, student_ids): page_views = {} verbose = 0 for student_id in student_ids: a = f'/api/v1/courses/{course_id}/analytics/users/{student_id}/activity' response = fetch(url + a, verbose) page_views[student_id] = sum(response.get('page_views', {}).values()) if verbose: print(page_views) return page_views schedules = {} orientations = {} def load_schedules(): global schedules if not schedules: for f in os.listdir('cache/schedule'): m = re.search(r'(\w\w\d\d)_sched_expanded\.json', f) if m: sem = m.group(1) schedules[sem] = json.loads( codecs.open('cache/schedule/' + f, 'r', 'utf-8').read() ) def load_orientations(): global orientations if not orientations: orientations = json.loads( codecs.open(student_orientation_participation,'r','utf-8').read() ) return orientations def to_crn_fallback(name): #print(name) name = name.lower() try: m1 = re.search(r'(\d\d\d\d\d)',name) if m1: crn = m1.group(1) else: return None,None m2 = re.search(r'([wispufa][wispufa]\d\d)',name.lower()) if m2: sem = m2.group(1) else: return None, None #print(name, crn, sem) return crn, sem except Exception as e: #print("Exception: ", e, name) return None, None def ilearn_name_to_course_code(iname): parts = iname.split(' ') code = parts[0] return code def short_name_to_crn(name): #print(name) try: parts = name.split(' ') code = parts[0] sem = parts[1] crn = parts[2] m_sem = re.search(r'^(\w\w\d\d)$',sem) if not m_sem: return to_crn_fallback(name) m = re.search(r'^(\d\d\d\d\d)$',crn) if m: return crn,sem else: crn_parts = crn.split('/') m = re.search(r'^(\d\d\d\d\d)$',crn_parts[0]) if m: return crn_parts[0],sem #print("non standard course short name: ", code, sem, crn) return to_crn_fallback(name) except Exception as e: #print("Exception: ", e, name) return to_crn_fallback(name) def fixname(n): return re.sub(r'\s+',' ', n).strip() def short_name_to_teacher_type_crn_sem(name): load_schedules() crn, sem = short_name_to_crn(name) try: if sem: sem = sem.lower() if sem[0:2]=='wi': sem = 'sp' + sem[2:] for course in schedules[sem]: if course['crn'] == crn: return fixname(course['teacher']), course['type'], crn, sem except Exception as e: return None, None, None, None return None, None, None, None pocrs = {} def load_pocrs(): global pocrs if not pocrs: with open('cache/pocr_passed.csv') as csvfile: csvreader = csv.reader(csvfile) next(csvreader) for row in csvreader: pocrs[row[0] + " " + row[1]] = row[2] return pocrs def lookup_pocr(teacher,course,sem): p = load_pocrs() pcode = teacher + " " + course if pcode in p: sem_passed = sem_code_to_num(p[pcode]) sem_test = sem_code_to_num(sem) if sem_passed < sem_test: return True return False def nametest(): with open(all_courses_file) as csvfile: csvreader = csv.reader(csvfile) next(csvreader) for row in csvreader: print(row[0], "-", short_name_to_teacher_type_crn_sem(row[0])) next(csvreader) def above_70(li,maximum): cutoff = 0.7 * maximum above = list(filter(lambda x: x >= cutoff, li)) return round(len(above)/len(li), 3) # v1, does a row of averages for each course def process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code): fxns = [mean, median, stdev, min, max, len] c_id = block[0][0] sem = block[0][1] course_code = block[0][2] cur_scores = [num(x[6]) for x in block] final_scores = [num(x[7]) for x in block] teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code) if not teacher: return tch_code = teacher_to_code[teacher] crs_code = course_to_code[course_code] if len(final_scores) < 2: return try: (cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns] (final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns] cur_pct_passed = above_70(cur_scores, cur_max) final_pct_passed = above_70(final_scores, final_max) if final_max == 0: return scaled_final_scores = [ x / final_max for x in final_scores] (scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns] good_code = ilearn_name_to_course_code(course_code) pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0 output.writerow( [crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] ) out_c.writerow([crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, final_count]) except Exception as e: print("Exception:", e) # v2, one line per student/course def process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code): fxns = [mean, median, stdev, min, max, len] c_id = block[0][0] sem = block[0][1] course_code = block[0][2] cur_scores = [num(x[6]) for x in block] final_scores = [num(x[7]) for x in block] teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code) if not teacher: return tch_code = teacher_to_code[teacher] crs_code = course_to_code[course_code] if len(final_scores) < 2: return try: # "course_code course pocr_status orientation_status teacher_code mode student_id scaled_score" (final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns] final_pct_passed = above_70(final_scores, final_max) if final_max == 0: return scaled_final_scores = [ x / final_max for x in final_scores] (scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns] good_code = ilearn_name_to_course_code(course_code) pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0 o = load_orientations() for row in block: student_id = row[3] orientation = o[student_id] if student_id in o else 0 scaled_score = round(num(row[7]) / final_max, 2) out_f.writerow([crs_code, good_code, pocr, orientation, tch_code, mode, student_id, scaled_score]) print(course_code) except Exception as e: print("Exception:", e) def process_grades(): # first loop to get all names courses_labeled = {} teacher_to_code = {} code_to_teacher = {} course_to_code = {} code_to_course = {} index = 1001 crs_index = 4001 with open(all_grades_file, newline="") as csvfile: csvreader = csv.reader(csvfile) next(csvreader) for row in csvreader: crn_sem = row[0] + '_' + row[1] if not crn_sem in courses_labeled: teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(row[2]) courses_labeled[crn_sem] = teacher if not row[2] in course_to_code: course_to_code[row[2]] = crs_index code_to_course[crs_index] = row[2] crs_index += 1 if teacher: if not teacher in teacher_to_code: teacher_to_code[teacher] = index code_to_teacher[index] = teacher index += 1 codecs.open('cache/teacher_lookup_codes.json','w','utf-8').write( json.dumps( [teacher_to_code, code_to_teacher], indent=2) ) codecs.open('cache/course_lookup_codes.json','w','utf-8').write( json.dumps( [course_to_code, code_to_course], indent=2) ) out_fullrows = codecs.open(all_courses_file3,'w','utf-8') out_f = csv.writer(out_fullrows) out_f.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" ")) out_compact = codecs.open(all_courses_file2,'w','utf-8') out_c = csv.writer(out_compact) out_c.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev count".split(" ")) with open(all_courses_file, "w", newline="") as output_f: output = csv.writer(output_f) output.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev scl_min scl_max count".split(" ")) with open(all_grades_file, newline="") as csvfile: csvreader = csv.reader(csvfile) block = [] current_index = None next(csvreader) for row in csvreader: index = row[0] if index != current_index: if block: process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code) process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code) block = [] current_index = index block.append(row) if block: process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code) process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code) def reorganize_grades_student(): with open(all_courses_file3, newline="") as csvfile: csvreader = csv.reader(csvfile) bystudent = defaultdict(list) next(csvreader) for row in csvreader: st = row[6] bystudent[st].append(row) students = sorted(bystudent.keys()) with open(all_courses_file4, "w", newline="") as output_f: with open(all_courses_file5, "w", newline="") as output_s: with open(student_courses_scores,'w') as output_scs: output_s.write("student,courses\n") output = csv.writer(output_f) output.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" ")) # student id 0 has no courses output.writerow([0,]) for st in students: courses = [r[1] for r in bystudent[st]] scores = [r[7] for r in bystudent[st]] zipped = zip(courses,scores) output_scs.write(st + ",") for c,s in zipped: output_scs.write(f"{c}|{s},") output_scs.write("\n") output_s.write(st + "," + " ".join(courses) + "\n") for row in bystudent[st]: output.writerow(row) def all_course_names_setup(): cc = json.loads(codecs.open('cache/courses/courses_built.json','r','utf-8').read()) courses = {} for C in cc.values(): name = C['dept'] + C['number'] #print(name) courses[ name ] = C #co = codecs.open('cache/courses/names.json','w','utf-8') #for c in sorted(courses.keys()): # co.write(c + "\n") cr = codecs.open('cache/courses/names.json','r','utf-8') from_data = codecs.open('cache/courses_student_scores.csv','r','utf-8').readlines() unknown = {} for line in from_data: parts = line.split(',') stu_id = parts[0] ea = parts[1:] for C in ea: each = C.split('|') name = each[0] if not name in courses: unknown[name] = name #data_courses[each[0]] += 1 for c in sorted(unknown.keys()): print(c) #co.write( json.dumps( {'unknown':unknown, 'coursenames':courses}, indent=2 )) lookup = {} names = {} def shell2course(shell): global lookup, names if not lookup: cr = json.loads(codecs.open('cache/courses/names.json','r','utf-8').read()) lookup = cr['unknown'] allcourses = cr['coursenames'] names = allcourses.keys() if shell in names: return shell if shell in lookup: c = lookup[shell] if c in names: return c #print(f"Can't find course: {shell}") return "" def stu_record_line(line): line = line.strip() line = line.strip(',') parts = line.split(',') stu_id = parts[0] courses = [] for C in parts[1:]: courses.append(C.split('|')) return stu_id, courses def stu_record_to_vector(line, boolean=0): id, courses = stu_record_line(line) yesval = "true" if boolean else 1 noval = "false" if boolean else 0 template = json.loads(codecs.open('cache/courses/course_main_record.json','r','utf-8').read()) lookup = {} for i,c in enumerate(template): lookup[c] = i vector = [noval for x in range(len(template))] for C in courses: goodname = shell2course(C[0]) if goodname: vector[lookup[goodname]] = yesval # C[1] # score return id,vector,courses def grades_to_vectors(boolean=0, verbose=0): grades = codecs.open('cache/courses_student_scores.csv','r','utf-8').readlines() for L in grades: id, vector, courses = stu_record_to_vector(L,boolean) if verbose: print(id, vector) yield id, vector, courses def course_main_record(): return json.loads(codecs.open('cache/courses/course_main_record.json','r','utf-8').read()) def courses_to_vector_ordered(course_list): # each course is (name, semester_order, score) template = course_main_record() lookup = {} for i,c in enumerate(template): lookup[c] = i vector = ['0' for x in range(len(template))] for course,order,score in course_list: goodname = shell2course(course) if goodname: vector[lookup[goodname]] = str(order) return vector def courses_to_vector(course_list, boolean=1): #print(course_list) yesval = "true" if boolean else 1 noval = "false" if boolean else 0 template = course_main_record() lookup = {} for i,c in enumerate(template): lookup[c] = i vector = [noval for x in range(len(template))] for C in course_list: C = C.strip() #goodname = shell2course(C[0]) #if goodname: #print(C) vector[lookup[C]] = yesval # C[1] # score #print(vector) return vector def course_vector_to_names(vector): template = course_main_record() names = [] for i,v in enumerate(vector): if v: names.append(template[i]) return names def all_course_names(): ac = json.loads(codecs.open('cache/courses/courses_built.json','r','utf-8').read()) master_record = [] for C in ac.values(): if C['status'] == 'Draft': continue name = C['dept'] + C['number'] master_record.append(name) master_record = set(master_record) master_record = list(master_record) master_record = sorted(master_record) ## Extract from all 'accomplished courses'... if 0: complete_list = {} missing_names = {} with open(student_courses_scores,'r') as input_f: for L in input_f: stu_id, courses = stu_record_line(L) for C in courses: real_name = shell2course(C[0]) if real_name: complete_list[real_name] = 1 else: missing_names[C[0]] = 1 master_record = sorted(complete_list.keys()) print(f"Found {len(master_record)} courses") print(master_record) print(f"Missing {len(missing_names)} courses") print(missing_names) mr = codecs.open('cache/courses/course_main_record.json','w','utf-8') mr.write(json.dumps(master_record,indent=2)) from semesters import semester_list, canvas_label from semesters import code as semester_order from localcache import all_students_history from datetime import datetime, timedelta def semester_dates(): #print() for c in canvas_label: print(semester_list[c]) length = 15 if semester_list[c]['code'][0:2] == 'su': length = 5 start_date = semester_list[c]['start'] # Convert the date string to a datetime object date_object = datetime.strptime(start_date, '%m/%d/%y') start_fmt = date_object.strftime('%a %b %d, %Y') # Add 15weeks, 5days to the date new_date = date_object + timedelta(weeks=15) new_date = new_date + timedelta(days=5) # Format the new date as a string new_date_string = new_date.strftime('%m/%d/%y') end_fmt = new_date.strftime('%a %b %d, %Y') # Print the new date print(f"start: {start_fmt}, end: {end_fmt}") current_student = "" current_student_block = [] current_student_info = {'first':'', 'last':''} normalized_blocks = [] ignore_courses = "El,zACCT20,GASPAR".split(",") seen_courses = [] def course_line_process(line): global current_student, current_student_block, seen_courses, normalized_blocks, current_student_info sem = line['term_name'] m1 = re.search(r'^(\d\d\d\d)\s(\w+)$', sem) if not m1: # is NOT an academic semester, skip return uid = line['canvasid'] if uid != current_student: if current_student_block: current_student_block.append(current_student_info) normalized_blocks.append(current_student_block) current_student_block = [] current_student_info = {'first':semester_list[sem]['code'], 'last':''} current_student = uid #print(f"Student: {uid} ({line['user_name']})") # line is a dict current_student_info['last'] = semester_list[sem]['code'] year, season = m1.group(1), m1.group(2) date_format = "%Y-%m-%d %H:%M:%S.%f" create_dt = datetime.strptime(line['created'], date_format) update_dt = datetime.strptime(line['updated'], date_format) sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y') course = line['course_name'] c_parts = course.split(' ') if c_parts[0] in ignore_courses or c_parts[0] in seen_courses: return classname = shell2course(c_parts[0]) if not classname: # print empty dict entry for initial setup # print(f" \"{c_parts[0]}\": \"\",") seen_courses.append(c_parts[0]) else: # flow = line['workflow'] mark = '+' if flow == "deleted": mark = '-' # normal start & finish, give add date add_day = sem_start - create_dt add_day = add_day.days sign = '-' if add_day < 0: add_day = -add_day sign = '+' #print(f" {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}") temp_usr_name = re.sub(r',','',line['user_name']) current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}") if flow == "deleted": # deleted, give delete date del_day = sem_start - update_dt del_day = del_day.days sign = '-' if del_day < 0: del_day = -del_day sign = '+' #print(f" {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}") current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}") def normalize_course_histories(): global normalized_blocks, current_student_block, current_student_info all_students_history(course_line_process, limit=99910000) current_student_block.append(current_student_info) normalized_blocks.append(current_student_block) codecs.open('cache/normalized_student_add_drop.json','w','utf-8').write(json.dumps(normalized_blocks,indent=2)) # let's see if we can get grades... grades_by_student_course = defaultdict(dict) print("Doing grades...") with codecs.open('cache/courses_student_scores.csv','r','utf-8') as gradesfile: for s in gradesfile: parts = s.split(',') stu = int(parts[0]) #print(stu) for c in parts[1:]: try: #print(c) crs,gra = c.split('|') grades_by_student_course[stu][crs] = gra except Exception as e: pass # go through again print("Second pass of grades and student history...") student_history = codecs.open('cache/normalized_student_history.csv','w','utf-8') student_history.write("studentid,studentname,course,action,when,grade,sem_name,first_sem,last_sem,tenure_length,sem_index\n") semester_order.reverse() for blk in normalized_blocks: info = blk[-1] first = semester_order.index(info['first']) + 1 last = semester_order.index(info['last']) + 1 length = last - first + 1 for course in blk[:-1]: parts = course.split(',') #print(parts) sem = parts[5] sem_index = semester_order.index(sem) - first + 2 stu = int(parts[0]) crs = parts[2] grade = "" if stu in grades_by_student_course: if crs in grades_by_student_course[stu]: grade = grades_by_student_course[stu][crs] student_history.write(",".join([parts[0], parts[1], parts[2], parts[3], parts[4], grade, parts[5], str(first), str(last), str(length), str(sem_index), ]) + '\n') # make "unified records" or one line per student student_history_2 = codecs.open('cache/normalized_student_history2.csv','w','utf-8') allcourse = course_main_record() #print(allcourse) template = ['studentid', 'studentname', 'tenure_length'] template.extend(allcourse) #print(template) student_history_2.write( ",".join(template) + "\n" ) for blk in normalized_blocks: student_block = [] info = blk[-1] first = semester_order.index(info['first']) + 1 last = semester_order.index(info['last']) + 1 length = last - first + 1 temp_course_holder = {} temp_course_grade_holder = {} for course in blk[:-1]: parts = course.split(',') #print(parts) sem = parts[5] sem_index = semester_order.index(sem) - first + 2 stu = int(parts[0]) crs = parts[2] if parts[3] == 'add': temp_course_holder[crs] = sem_index elif parts[3] == 'del' and crs in temp_course_holder: del temp_course_holder[crs] # now the temp_course_holder has the courses and semesters for crs,sem_index in temp_course_holder.items(): grade = "" if stu in grades_by_student_course: if crs in grades_by_student_course[stu]: grade = grades_by_student_course[stu][crs] this_record = (crs, sem_index, grade) student_block.append(this_record) student_vector = [ parts[0], parts[1], str(length) ] student_vector.extend(courses_to_vector_ordered(student_block)) student_history_2.write(",".join(student_vector) + '\n') #print(student_vector) def cluster_student_histories(): infile = 'cache/courses_student_scores.csv' import pandas as pd import matplotlib.pyplot as plt from kneed import KneeLocator from sklearn.datasets import make_blobs from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score from sklearn.preprocessing import StandardScaler df = pd.read_csv(infile) if __name__ == "__main__": options = { 1: ['get all historical grades from ilearn',get_all] , 2: ['process grades csv file',process_grades] , 3: ['reorganize full grades file by student', reorganize_grades_student], 4: ['test shortname parse',nametest] , 5: ['test sem codes',codetest] , 6: ['get student data from orientations', get_student_orientations], 7: ['manage course master list', all_course_names], 8: ['grades to vectors', grades_to_vectors], 9: ['semester startdates list', semester_dates], 10: ['normalize course histories', normalize_course_histories], 11: ['cluster student histories', cluster_student_histories], } print ('') if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): resp = int(sys.argv[1]) print("\n\nPerforming: %s\n\n" % options[resp][0]) else: print ('') for key in options: print(str(key) + '.\t' + options[key][0]) print('') resp = input('Choose: ') # Call the function in the options dict options[ int(resp)][1]()