diff --git a/stats.py b/stats.py index 97e500b..a977abc 100644 --- a/stats.py +++ b/stats.py @@ -37,6 +37,18 @@ - Next steps: generate the x-reference for what categories teachers are in, and integrate into the main data file. + +- Next steps (June/July 2023) + - add campus, time of day, and sem_order (which semester in their college career did they take it) columns + - Organize rows by students + + Develop a way to categorize them: by course set and/or score set (cluestering: kmeans, forest, etc) + + - Goals + - display and summarize clusters of students on a dashboard + - ongoing categorization (implying course recommendations and interventions) based on it + - + + ## Hypothesis Testing - @@ -54,6 +66,9 @@ all_grades_file = f"cache/grades_all.csv" all_courses_file = f"cache/course_grades_all.csv" all_courses_file2 = f"cache/course_grades_compact.csv" all_courses_file3 = f"cache/course_grades_full.csv" +all_courses_file4 = "cache/course_grades_full_bystudent.csv" +all_courses_file5 = "cache/courses_passed_bystudent.csv" +student_courses_scores = "cache/courses_student_scores.csv" student_orientation_participation = f'cache/participation_orientation_courses.json' @@ -454,15 +469,48 @@ def process_grades(): process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code) - +def reorganize_grades_student(): + with open(all_courses_file3, newline="") as csvfile: + csvreader = csv.reader(csvfile) + bystudent = defaultdict(list) + + next(csvreader) + + for row in csvreader: + st = row[6] + bystudent[st].append(row) + + students = sorted(bystudent.keys()) + with open(all_courses_file4, "w", newline="") as output_f: + with open(all_courses_file5, "w", newline="") as output_s: + with open(student_courses_scores,'w') as output_scs: + output_s.write("student,courses\n") + output = csv.writer(output_f) + output.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" ")) + for st in students: + courses = [r[1] for r in bystudent[st]] + scores = [r[7] for r in bystudent[st]] + zipped = zip(courses,scores) + output_scs.write(st + ",") + for c,s in zipped: + output_scs.write(f"{c}|{s},") + output_scs.write("\n") + output_s.write(st + "," + " ".join(courses) + "\n") + for row in bystudent[st]: + output.writerow(row) + + + + if __name__ == "__main__": options = { 1: ['get all historical grades from ilearn',get_all] , 2: ['process grades csv file',process_grades] , - 3: ['test shortname parse',nametest] , - 4: ['test sem codes',codetest] , - 5: ['get student data from orientations', get_student_orientations], + 3: ['reorganize full grades file by student', reorganize_grades_student], + 4: ['test shortname parse',nametest] , + 5: ['test sem codes',codetest] , + 6: ['get student data from orientations', get_student_orientations], } print ('')