diff --git a/stats.py b/stats.py
index 97e500b..a977abc 100644
--- a/stats.py
+++ b/stats.py
@@ -37,6 +37,18 @@
 - Next steps: generate the x-reference for what categories teachers are in, and
   integrate into the main data file.
 
+
+- Next steps (June/July 2023)
+    - add campus, time of day, and sem_order (which semester in their college career did they take it) columns
+    - Organize rows by students
+    + Develop a way to categorize them: by course set and/or score set (cluestering: kmeans, forest, etc)
+
+    - Goals
+        - display and summarize clusters of students on a dashboard
+        - ongoing categorization (implying course recommendations and interventions) based on it
+        - 
+
+
 ## Hypothesis Testing
 
 - 
@@ -54,6 +66,9 @@ all_grades_file = f"cache/grades_all.csv"
 all_courses_file = f"cache/course_grades_all.csv"
 all_courses_file2 = f"cache/course_grades_compact.csv"
 all_courses_file3 = f"cache/course_grades_full.csv"
+all_courses_file4 = "cache/course_grades_full_bystudent.csv"
+all_courses_file5 = "cache/courses_passed_bystudent.csv"
+student_courses_scores = "cache/courses_student_scores.csv"
 student_orientation_participation = f'cache/participation_orientation_courses.json'
 
 
@@ -454,15 +469,48 @@ def process_grades():
                 process_one_course_grades_full(block, out_f, teacher_to_code, course_to_code)
 
 
-    
+def reorganize_grades_student():
+    with open(all_courses_file3, newline="") as csvfile:
+        csvreader = csv.reader(csvfile)
+        bystudent = defaultdict(list)
+
+        next(csvreader)
+
+        for row in csvreader:
+            st = row[6]
+            bystudent[st].append(row)
+
+        students = sorted(bystudent.keys())
+        with open(all_courses_file4, "w", newline="") as output_f:
+            with open(all_courses_file5, "w", newline="") as output_s:
+                with open(student_courses_scores,'w') as output_scs:
+                    output_s.write("student,courses\n")
+                    output = csv.writer(output_f)
+                    output.writerow("course_code course pocr_status orientation_status teacher_code mode student_id scaled_score".split(" "))
+                    for st in students:
+                        courses = [r[1] for r in bystudent[st]]
+                        scores = [r[7] for r in bystudent[st]]
+                        zipped = zip(courses,scores)
+                        output_scs.write(st + ",")
+                        for c,s in zipped:
+                            output_scs.write(f"{c}|{s},")
+                        output_scs.write("\n")
+                        output_s.write(st + "," + " ".join(courses) + "\n")
+                        for row in bystudent[st]:
+                            output.writerow(row)
+
+
+
+
 
 
 if __name__ == "__main__":
     options = { 1: ['get all historical grades from ilearn',get_all] ,  
                 2: ['process grades csv file',process_grades] , 
-                3: ['test shortname parse',nametest] , 
-                4: ['test sem codes',codetest] , 
-                5: ['get student data from orientations', get_student_orientations],
+                3: ['reorganize full grades file by student', reorganize_grades_student],
+                4: ['test shortname parse',nametest] , 
+                5: ['test sem codes',codetest] , 
+                6: ['get student data from orientations', get_student_orientations],
               }
     print ('')