From b0243da53ea913802f7e2377072cc8603a9454be Mon Sep 17 00:00:00 2001 From: phowell Date: Thu, 11 May 2023 07:19:21 -0700 Subject: [PATCH] stats-make non summary data file --- stats.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/stats.py b/stats.py index a324b8e..d79ede0 100644 --- a/stats.py +++ b/stats.py @@ -51,6 +51,7 @@ from collections import defaultdict all_grades_file = f"cache/grades_all.csv" all_courses_file = f"cache/course_grades_all.csv" all_courses_file2 = f"cache/course_grades_compact.csv" +all_courses_file3 = f"cache/course_grades_full.csv" def sem_num_to_code(sem_num): p = re.search(r'^(\d\d\d\d)(\d\d)$', sem_num) @@ -235,6 +236,11 @@ def above_70(li,maximum): above = list(filter(lambda x: x >= cutoff, li)) return round(len(above)/len(li), 3) + + + + +# v1, does a row of averages for each course def process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code): fxns = [mean, median, stdev, min, max, len] c_id = block[0][0] @@ -264,18 +270,52 @@ def process_one_course_grades(block, output, out_c, teacher_to_code, course_to_c good_code = ilearn_name_to_course_code(course_code) pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0 - - #print("Course % > 70 mean median stdev min max count") - #print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count)) - #print("{:>12} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {: 6.0f} {:6d} ".format(course_code, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, final_count)) - #print() - #output.writerow( [sem2, crn, course_code, "current score", teacher, mode, cur_pct_passed, cur_mean, cur_median, cur_stdev, cur_min, cur_max, cur_count] ) - #output.writerow( [crs_code, pocr, tch_code, mode, final_pct_passed, final_mean, final_median, final_stdev, final_min, final_max, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] ) output.writerow( [crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, scl_min, scl_max, final_count] ) out_c.writerow([crs_code, good_code, pocr, tch_code, mode, final_pct_passed, scl_mean, scl_median, scl_stdev, final_count]) except Exception as e: print("Exception:", e) + + + +# v2, one line per student/course +def process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code): + fxns = [mean, median, stdev, min, max, len] + c_id = block[0][0] + sem = block[0][1] + course_code = block[0][2] + cur_scores = [num(x[6]) for x in block] + final_scores = [num(x[7]) for x in block] + teacher, mode, crn, sem2 = short_name_to_teacher_type_crn_sem(course_code) + if not teacher: + return + tch_code = teacher_to_code[teacher] + crs_code = course_to_code[course_code] + if len(final_scores) < 2: + return + try: + + # "course_code course pocr_status teacher_code mode student_id scaled_score" + + (final_mean, final_median, final_stdev, final_min, final_max, final_count) = [round(f(final_scores)) for f in fxns] + final_pct_passed = above_70(final_scores, final_max) + + if final_max == 0: return + + scaled_final_scores = [ x / final_max for x in final_scores] + (scl_mean, scl_median, scl_stdev, scl_min, scl_max, scl_count) = [round(f(scaled_final_scores),2) for f in fxns] + + good_code = ilearn_name_to_course_code(course_code) + pocr = 1 if lookup_pocr(teacher, good_code, sem2) else 0 + + for row in block: + student_id = row[3] + scaled_score = round(num(row[7]) / final_max, 2) + out_f.writerow([crs_code, good_code, pocr, tch_code, mode, student_id, scaled_score]) + print(course_code) + except Exception as e: + print("Exception:", e) + def process_grades(): # first loop to get all names courses_labeled = {} @@ -310,6 +350,10 @@ def process_grades(): codecs.open('cache/teacher_lookup_codes.json','w','utf-8').write( json.dumps( [teacher_to_code, code_to_teacher], indent=2) ) codecs.open('cache/course_lookup_codes.json','w','utf-8').write( json.dumps( [course_to_code, code_to_course], indent=2) ) + out_fullrows = codecs.open(all_courses_file3,'w','utf-8') + out_f = csv.writer(out_fullrows) + out_f.writerow("course_code course pocr_status teacher_code mode student_id scaled_score".split(" ")) + out_compact = codecs.open(all_courses_file2,'w','utf-8') out_c = csv.writer(out_compact) out_c.writerow("course_code course pocr_status teacher_code mode percent_passed scl_mean scl_median scl_stdev count".split(" ")) @@ -330,6 +374,7 @@ def process_grades(): if index != current_index: if block: process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code) + process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code) block = [] current_index = index @@ -337,6 +382,7 @@ def process_grades(): if block: process_one_course_grades(block, output, out_c, teacher_to_code, course_to_code) + process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code)