produce student history

2023-06-01 09:03:27 -07:00 · 2023-06-01 09:03:27 -07:00 · d2b94f1c92
parent 6adaa6a1c4
commit d2b94f1c92
1 changed files with 52 additions and 4 deletions
--- a/stats.py
+++ b/stats.py
@ -37,6 +37,18 @@
 - Next steps: generate the x-reference for what categories teachers are in, and
  integrate into the main data file.
 - Next steps (June/July 2023)
    - add campus, time of day, and sem_order (which semester in their college career did they take it) columns
    - Organize rows by students
    + Develop a way to categorize them: by course set and/or score set (cluestering: kmeans, forest, etc)
    - Goals
        - display and summarize clusters of students on a dashboard
        - ongoing categorization (implying course recommendations and interventions) based on it
        - 
 ## Hypothesis Testing
 - 
@ -54,6 +66,9 @@ all_grades_file = f"cache/grades_all.csv"
 all_courses_file = f"cache/course_grades_all.csv"
 all_courses_file2 = f"cache/course_grades_compact.csv"
 all_courses_file3 = f"cache/course_grades_full.csv"
 all_courses_file4 = "cache/course_grades_full_bystudent.csv"
 all_courses_file5 = "cache/courses_passed_bystudent.csv"
 student_courses_scores = "cache/courses_student_scores.csv"
 student_orientation_participation = f'cache/participation_orientation_courses.json'
@ -454,15 +469,48 @@ def process_grades():
                process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code)
 def reorganize_grades_student():
    with open(all_courses_file3, newline="") as csvfile:
        csvreader = csv.reader(csvfile)
        bystudent = defaultdict(list)
        next(csvreader)
        for row in csvreader:
            st = row[6]
            bystudent[st].append(row)
        students = sorted(bystudent.keys())
        with open(all_courses_file4, "w", newline="") as output_f:
            with open(all_courses_file5, "w", newline="") as output_s:
                with open(student_courses_scores,'w') as output_scs:
                    output_s.write("student,courses\n")
                    output = csv.writer(output_f)
                    output.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" "))
                    for st in students:
                        courses = [r[1] for r in bystudent[st]]
                        scores = [r[7] for r in bystudent[st]]
                        zipped = zip(courses,scores)
                        output_scs.write(st + ",")
                        for c,s in zipped:
                            output_scs.write(f"{c}|{s},")
                        output_scs.write("\n")
                        output_s.write(st + "," + " ".join(courses) + "\n")
                        for row in bystudent[st]:
                            output.writerow(row)
 if __name__ == "__main__":
    options = { 1: ['get all historical grades from ilearn',get_all] ,  
                2: ['process grades csv file',process_grades] , 
-                3: ['test shortname parse',nametest] , 
+                3: ['reorganize full grades file by student', reorganize_grades_student],
-                4: ['test sem codes',codetest] , 
+                4: ['test shortname parse',nametest] , 
-                5: ['get student data from orientations', get_student_orientations],
+                5: ['test sem codes',codetest] , 
                6: ['get student data from orientations', get_student_orientations],
              }
    print ('')