From 9e485d7641ffeac13c4ac14207c75353cff6a4c1 Mon Sep 17 00:00:00 2001 From: Coding with Peter Date: Mon, 25 Mar 2024 18:15:28 -0700 Subject: [PATCH] stats on historical course offerings --- courses.py | 119 +++++++++++++++++++++++++++++++++++++++++++++++++-- semesters.py | 4 +- util.py | 2 +- 3 files changed, 120 insertions(+), 5 deletions(-) diff --git a/courses.py b/courses.py index 9615c85..9c3c99d 100644 --- a/courses.py +++ b/courses.py @@ -2018,7 +2018,119 @@ def change_link_in_all_terms_pages(): term = 181 courses = getCoursesInTerm(term,get_fresh=1,show=0,active=1) - + +def enrollment_helper(): + + ignore = ['JLE','JFT', 'CWE'] + ignore2 = ['AH 190', 'AE 600', 'AE 602', 'AE 603','ACCT 190','AJ 100A', 'AJ 107A', 'AJ 213A','AJ 229A','AJ 231A','AMT 190','ATH 23','BUS 190','CD 190','COS 290','WTRM 290','SPAN 8A', 'SPAN 8B', 'SPAN 8C', 'SPAN 8D', 'RE 190','MKTG 190'] + keep = 'code,name,days,cap,act,teacher,date,partofday,type,site'.split(',') + oo = codecs.open('cache/section_history.json','w','utf-8') + # fetch enrollment stats for last few years + from semesters import code, sems, to_sis_sem + from util import dept_from_name + raw = [] + code.reverse() + sort = defaultdict(dict) + for s in sems.keys(): + try: + sched1 = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json() + sort[s] = defaultdict(dict) + for sect in sched1: + if sect['name'] in ignore2: + continue + sect_smaller = funcy.project(sect,keep) + sect_smaller['sem'] = to_sis_sem(s) + if int(sect_smaller['cap'])==0 or int(sect_smaller['act'])==0: + sect_smaller['fill_pct'] = 100 + else: + sect_smaller['fill_pct'] = round( (int(sect_smaller['act']) / int(sect_smaller['cap']))*100 ) + d = dept_from_name(sect_smaller['code']) + if d in ignore: + continue + sect_smaller['dept'] = d + raw.append(sect_smaller) + + if not d in sort[s]: + sort[s][d] = defaultdict(dict) + name = sect['code'] + if not name in sort[s][d]: + sort[s][d][name] = [] + sort[s][d][name].append(sect_smaller) + print(f"{s} OK.") + except Exception as e: + print(f"{s} not found. {e}") + #sems.pop(s) + oo.write(json.dumps(sort,indent=2)) + + df = pd.DataFrame(raw) + df_sorted = df.sort_values(['dept', 'code', 'type','site','partofday','fill_pct']) + df_sorted.to_csv('cache/section_history.csv') + + class_counts = df.groupby(['sem', 'code']).size().reset_index(name='class_count') + print("Class counts by semester") + print(class_counts) + pivot_df = class_counts.pivot_table(index='code', columns='sem', values='class_count', aggfunc='sum', fill_value=0) + # Reset the index to move 'class_name' back to a column + pivot_df.reset_index(inplace=True) + print(pivot_df) + pivot_df.to_csv('cache/section_counts_history.csv') + + + # Group by semester and class type, and then count the number of occurrences of each class type + class_type_counts = df.groupby(['sem', 'code', 'type']).size().reset_index(name='class_type_count') + print("Class type by semester") + print(class_type_counts) + pivot_df2 = class_type_counts.pivot_table(index='code', columns=['sem','type'], values='class_type_count', aggfunc='sum', fill_value=0) + # Reset the index to move 'class_name' back to a column + pivot_df2.reset_index(inplace=True) + + kmeans = try_clustering(pivot_df2.copy()) + + pivot_df2.insert(0, "Cluster", kmeans.labels_) + + print(pivot_df2) + pivot_df2.to_csv('cache/section_and_mode_counts_history.csv') + + + # Group by teacher + class_teacher_counts = df.groupby(['sem', 'code', 'teacher']).size().reset_index(name='class_teacher_count') + print("Class teacher by semester") + print(class_teacher_counts) + + # group by COURSE (ie: ENGL1A) + + # For each historical WINTER, SPRING, SUMMER, FALL: + + # number of sections offered, by mode, time of day, campus + + # all teachers who taught it (and their qual to teach online) + + # fill percentage for each section, then by mode, tod, campus + +def try_clustering(df): + # Import required libraries + from sklearn.cluster import KMeans + + # Preprocessing + + # Assuming df is your DataFrame and "modes" is your categorical column + #df['code'] = df['code'].astype('category').cat.codes + + # Removing any other unnecessary columns + df = df.drop(['code'], axis=1) + + # Perform KMeans clustering + kmeans = KMeans(n_clusters=4, random_state=0).fit(df) + + # Get the cluster labels + labels = kmeans.labels_ + + # Add labels to the DataFrame + #df['clusters'] = labels + #print(df) + #df.to_csv('cache/section_and_mode_counts_history_clusters.csv') + return kmeans + if __name__ == "__main__": options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] , @@ -2063,13 +2175,14 @@ if __name__ == "__main__": 44: ['List users who passed GOTT 1 / Bootcamp', get_gott1_passers], 45: ['List users who passed Plagiarism Module', get_plague_passers], 46: ['make courses visible to auth users', modify_courses], + 47: ['enrollment helper', enrollment_helper], # 24: ['Add course evals to whole semester',instructor_list_to_activate_evals], # 21: ['Add announcements to homepage', change_course_ann_homepage], # TODO wanted: group shell for each GP (guided pathway) as a basic student services gateway.... # - 45: ['Fetch rubric scores and comments', fetch_rubric_scores], - 46: ['Fetch announcements in a course', fetch_announcements], + 50: ['Fetch rubric scores and comments', fetch_rubric_scores], + 51: ['Fetch announcements in a course', fetch_announcements], } print ('') diff --git a/semesters.py b/semesters.py index 39431b6..bc4f196 100644 --- a/semesters.py +++ b/semesters.py @@ -1,6 +1,6 @@ # Try to gather all the different formats and ways of labeling a semester, along with their associated dates. -import json +import json, funcy sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' } @@ -51,6 +51,8 @@ def to_sis_sem(s): # print(json.dumps(semester_list,indent=2)) +sems = funcy.project(semester_list, code) +#print(json.dumps(sems,indent=2)) """ diff --git a/util.py b/util.py index f8afd9c..674cef4 100644 --- a/util.py +++ b/util.py @@ -6,7 +6,7 @@ import re, csv from collections import defaultdict from bs4 import BeautifulSoup as bs import pytz, datetime, dateutil, json -from time import timedelta +from datetime import timedelta from dateutil import tz