diff --git a/stats.py b/stats.py index 5be4c35..df6edea 100644 --- a/stats.py +++ b/stats.py @@ -51,6 +51,33 @@ from collections import defaultdict all_grades_file = f"cache/grades_all.csv" all_courses_file = f"cache/course_grades_all.csv" +def sem_num_to_code(sem_num): + p = re.search(r'^(\d\d\d\d)(\d\d)$', sem_num) + if p: + yr = p.group(1)[2:4] + sem = p.group(2) + lookup = {'10':'wi','30':'sp', '50':'su', '70':'fa'} + return f"{lookup[sem]}{yr}" + return "" + +def sem_code_to_num(sem_code): # fa23 + p = re.search(r'^([a-z]{2})(\d\d)$', sem_code) + if p: + s = p.group(1) + y = p.group(2) + lookup = {'wi':'10','sp':'30', 'su':'50', 'fa':'70'} + return f"20{y}{lookup[s]}" + return "" + +def codetest(): + sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ') + codes = 'fa21 wi22 sp23 su23 fa23 wi24'.split(' ') + for s in sems: + print("{}: {}".format(s, sem_num_to_code(s))) + + for c in codes: + print("{}: {}".format(c, sem_code_to_num(c))) + def get_all(): terms = '178 177 176 175 174 173 172 171 168 65 64 62 63 61 60 25 26 23 22 21'.split(' ') sems = '202330 202310 202270 202250 202230 202210 202170 202150 202130 202070 202050 202030 202010 201970 201950 201930 201910 201870 201850 201830'.split(' ') @@ -120,7 +147,10 @@ def to_crn_fallback(name): #print("Exception: ", e, name) return None, None - +def ilearn_name_to_course_code(iname): + parts = iname.split(' ') + code = parts[0] + return code def short_name_to_crn(name): #print(name) @@ -168,7 +198,27 @@ def short_name_to_teacher_type_crn_sem(name): return None, None, None, None +pocrs = {} +def load_pocrs(): + global pocrs + if not pocrs: + with open('cache/pocr_passed.csv') as csvfile: + csvreader = csv.reader(csvfile) + next(csvreader) + for row in csvreader: + pocrs[row[0] + " " + row[1]] = row[2] + return pocrs + +def lookup_pocr(teacher,course,sem): + p = load_pocrs() + pcode = teacher + " " + course + if pcode in p: + sem_passed = sem_code_to_num(p[pcode]) + sem_test = sem_code_to_num(sem) + if sem_passed < sem_test: + return True + return False def nametest(): with open(all_courses_file) as csvfile: @@ -182,21 +232,22 @@ def nametest(): def above_70(li,maximum): cutoff = 0.7 * maximum above = list(filter(lambda x: x >= cutoff, li)) - return (len(above)/len(li)) + return round(len(above)/len(li), 3) -def process_one_course_grades(block, output): +def process_one_course_grades(block, output, teacher_to_code, course_to_code): fxns = [mean, median, stdev, min, max, len] c_id = block[0][0] sem = block[0][1] course_code = block[0][2] cur_scores = [num(x[6]) for x in block] final_scores = [num(x[7]) for x in block] - print(course_code) teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code) if not teacher: return - #print(cur_scores) - #print(final_scores) + tch_code = teacher_to_code[teacher] + crs_code = course_to_code[course_code] + if len(final_scores) < 2: + return try: (cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns] (final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns] @@ -204,19 +255,62 @@ def process_one_course_grades(block, output): cur_pct_passed = above_70(cur_scores, cur_max) final_pct_passed = above_70(final_scores, final_max) - print("Course % > 70 mean median stdev min max count") - print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count)) - print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count)) - print() + if final_max == 0: return + + scaled_final_scores = [ x / final_max for x in final_scores] + (scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns] + + good_code = ilearn_name_to_course_code(course_code) + pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0 + + + #print("Course % > 70 mean median stdev min max count") + #print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count)) + #print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count)) + #print() #output.writerow( [sem2, crn, course_code, "current score", teacher, mode, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] ) - output.writerow( [sem2, crn, course_code, "final score", teacher, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count] ) + #output.writerow( [crs_code, pocr, tch_code, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] ) + output.writerow( [crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] ) except Exception as e: print("Exception:", e) def process_grades(): + # first loop to get all names + courses_labeled = {} + teacher_to_code = {} + code_to_teacher = {} + + course_to_code = {} + code_to_course = {} + + index = 1001 + crs_index = 4001 + + with open(all_grades_file, newline="") as csvfile: + csvreader = csv.reader(csvfile) + next(csvreader) + for row in csvreader: + crn_sem = row[0] + '_' + row[1] + if not crn_sem in courses_labeled: + teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(row[2]) + courses_labeled[crn_sem] = teacher + + if not row[2] in course_to_code: + course_to_code[row[2]] = crs_index + code_to_course[crs_index] = row[2] + crs_index += 1 + + if teacher: + if not teacher in teacher_to_code: + teacher_to_code[teacher] = index + code_to_teacher[index] = teacher + index += 1 + codecs.open('cache/teacher_lookup_codes.json','w','utf-8').write( json.dumps( [teacher_to_code, code_to_teacher], indent=2) ) + codecs.open('cache/course_lookup_codes.json','w','utf-8').write( json.dumps( [course_to_code, code_to_course], indent=2) ) + with open(all_courses_file, "w", newline="") as output_f: output = csv.writer(output_f) - output.writerow("sem crn shortname score_type teacher mode percent_passed mean median stdev min max count".split(" ")) + output.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev scl_min scl_max count".split(" ")) with open(all_grades_file, newline="") as csvfile: csvreader = csv.reader(csvfile) @@ -230,14 +324,14 @@ def process_grades(): if index != current_index: if block: - process_one_course_grades(block, output) + process_one_course_grades(block, output, teacher_to_code, course_to_code) block = [] current_index = index block.append(row) if block: - process_one_course_grades(block, output) + process_one_course_grades(block, output, teacher_to_code, course_to_code) @@ -247,6 +341,7 @@ if __name__ == "__main__": options = { 1: ['get all historical grades from ilearn',get_all] , 2: ['process grades csv file',process_grades] , 3: ['test shortname parse',nametest] , + 4: ['test sem codes',codetest] , } print ('')