diff --git a/stats.py b/stats.py index 2d7f191..5be4c35 100644 --- a/stats.py +++ b/stats.py @@ -25,6 +25,8 @@ - CRNs to exclude, for example SP20, because of covid. Possibly SU20 and FA20 - CRNs in which teacher has done more than the minimum training in online teaching +- Next steps: generate the x-reference for what categories teachers are in, and + integrate into the main data file. ## Hypothesis Testing @@ -144,7 +146,12 @@ def short_name_to_crn(name): #print("Exception: ", e, name) return to_crn_fallback(name) -def short_name_to_teacher(name): + +def fixname(n): + return re.sub(r'\s+',' ', n).strip() + + +def short_name_to_teacher_type_crn_sem(name): load_schedules() crn, sem = short_name_to_crn(name) @@ -155,11 +162,11 @@ def short_name_to_teacher(name): sem = 'sp' + sem[2:] for course in schedules[sem]: if course['crn'] == crn: - return course['teacher'], course['type'] + return fixname(course['teacher']), course['type'], crn, sem except Exception as e: - return None + return None, None, None, None - return None + return None, None, None, None @@ -169,13 +176,13 @@ def nametest(): next(csvreader) for row in csvreader: - print(row[0], "-", short_name_to_teacher(row[0])) + print(row[0], "-", short_name_to_teacher_type_crn_sem(row[0])) next(csvreader) - - -def count_above_70(li): - pass +def above_70(li,maximum): + cutoff = 0.7 * maximum + above = list(filter(lambda x: x >= cutoff, li)) + return (len(above)/len(li)) def process_one_course_grades(block, output): fxns = [mean, median, stdev, min, max, len] @@ -184,25 +191,32 @@ def process_one_course_grades(block, output): course_code = block[0][2] cur_scores = [num(x[6]) for x in block] final_scores = [num(x[7]) for x in block] + print(course_code) + teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code) + if not teacher: + return #print(cur_scores) #print(final_scores) try: (cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count) = [round(f(cur_scores)) for f in fxns] (final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns] - print("Course mean median stdev min max count") - print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count)) - print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_mean, final_median, final_stdev, final_min, final_max, final_count)) + cur_pct_passed = above_70(cur_scores, cur_max) + final_pct_passed = above_70(final_scores, final_max) + + print("Course % > 70 mean median stdev min max count") + print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count)) + print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count)) print() - output.writerow( [course_code, "current score", cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] ) - output.writerow( [course_code, "final score", final_mean, final_median, final_stdev, final_min, final_max, final_count] ) + #output.writerow( [sem2, crn, course_code, "current score", teacher, mode, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] ) + output.writerow( [sem2, crn, course_code, "final score", teacher, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count] ) except Exception as e: print("Exception:", e) def process_grades(): with open(all_courses_file, "w", newline="") as output_f: output = csv.writer(output_f) - output.writerow("Course mean median stdev min max count".split(" ")) + output.writerow("sem crn shortname score_type teacher mode percent_passed mean median stdev min max count".split(" ")) with open(all_grades_file, newline="") as csvfile: csvreader = csv.reader(csvfile)