stats on historical course offerings

This commit is contained in:
Coding with Peter 2024-03-25 18:15:28 -07:00
parent 498b90ddcc
commit 9e485d7641
3 changed files with 120 additions and 5 deletions

View File

@ -2019,6 +2019,118 @@ def change_link_in_all_terms_pages():
courses = getCoursesInTerm(term,get_fresh=1,show=0,active=1) courses = getCoursesInTerm(term,get_fresh=1,show=0,active=1)
def enrollment_helper():
ignore = ['JLE','JFT', 'CWE']
ignore2 = ['AH 190', 'AE 600', 'AE 602', 'AE 603','ACCT 190','AJ 100A', 'AJ 107A', 'AJ 213A','AJ 229A','AJ 231A','AMT 190','ATH 23','BUS 190','CD 190','COS 290','WTRM 290','SPAN 8A', 'SPAN 8B', 'SPAN 8C', 'SPAN 8D', 'RE 190','MKTG 190']
keep = 'code,name,days,cap,act,teacher,date,partofday,type,site'.split(',')
oo = codecs.open('cache/section_history.json','w','utf-8')
# fetch enrollment stats for last few years
from semesters import code, sems, to_sis_sem
from util import dept_from_name
raw = []
code.reverse()
sort = defaultdict(dict)
for s in sems.keys():
try:
sched1 = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
sort[s] = defaultdict(dict)
for sect in sched1:
if sect['name'] in ignore2:
continue
sect_smaller = funcy.project(sect,keep)
sect_smaller['sem'] = to_sis_sem(s)
if int(sect_smaller['cap'])==0 or int(sect_smaller['act'])==0:
sect_smaller['fill_pct'] = 100
else:
sect_smaller['fill_pct'] = round( (int(sect_smaller['act']) / int(sect_smaller['cap']))*100 )
d = dept_from_name(sect_smaller['code'])
if d in ignore:
continue
sect_smaller['dept'] = d
raw.append(sect_smaller)
if not d in sort[s]:
sort[s][d] = defaultdict(dict)
name = sect['code']
if not name in sort[s][d]:
sort[s][d][name] = []
sort[s][d][name].append(sect_smaller)
print(f"{s} OK.")
except Exception as e:
print(f"{s} not found. {e}")
#sems.pop(s)
oo.write(json.dumps(sort,indent=2))
df = pd.DataFrame(raw)
df_sorted = df.sort_values(['dept', 'code', 'type','site','partofday','fill_pct'])
df_sorted.to_csv('cache/section_history.csv')
class_counts = df.groupby(['sem', 'code']).size().reset_index(name='class_count')
print("Class counts by semester")
print(class_counts)
pivot_df = class_counts.pivot_table(index='code', columns='sem', values='class_count', aggfunc='sum', fill_value=0)
# Reset the index to move 'class_name' back to a column
pivot_df.reset_index(inplace=True)
print(pivot_df)
pivot_df.to_csv('cache/section_counts_history.csv')
# Group by semester and class type, and then count the number of occurrences of each class type
class_type_counts = df.groupby(['sem', 'code', 'type']).size().reset_index(name='class_type_count')
print("Class type by semester")
print(class_type_counts)
pivot_df2 = class_type_counts.pivot_table(index='code', columns=['sem','type'], values='class_type_count', aggfunc='sum', fill_value=0)
# Reset the index to move 'class_name' back to a column
pivot_df2.reset_index(inplace=True)
kmeans = try_clustering(pivot_df2.copy())
pivot_df2.insert(0, "Cluster", kmeans.labels_)
print(pivot_df2)
pivot_df2.to_csv('cache/section_and_mode_counts_history.csv')
# Group by teacher
class_teacher_counts = df.groupby(['sem', 'code', 'teacher']).size().reset_index(name='class_teacher_count')
print("Class teacher by semester")
print(class_teacher_counts)
# group by COURSE (ie: ENGL1A)
# For each historical WINTER, SPRING, SUMMER, FALL:
# number of sections offered, by mode, time of day, campus
# all teachers who taught it (and their qual to teach online)
# fill percentage for each section, then by mode, tod, campus
def try_clustering(df):
# Import required libraries
from sklearn.cluster import KMeans
# Preprocessing
# Assuming df is your DataFrame and "modes" is your categorical column
#df['code'] = df['code'].astype('category').cat.codes
# Removing any other unnecessary columns
df = df.drop(['code'], axis=1)
# Perform KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=0).fit(df)
# Get the cluster labels
labels = kmeans.labels_
# Add labels to the DataFrame
#df['clusters'] = labels
#print(df)
#df.to_csv('cache/section_and_mode_counts_history_clusters.csv')
return kmeans
if __name__ == "__main__": if __name__ == "__main__":
options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] , options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,
@ -2063,13 +2175,14 @@ if __name__ == "__main__":
44: ['List users who passed GOTT 1 / Bootcamp', get_gott1_passers], 44: ['List users who passed GOTT 1 / Bootcamp', get_gott1_passers],
45: ['List users who passed Plagiarism Module', get_plague_passers], 45: ['List users who passed Plagiarism Module', get_plague_passers],
46: ['make courses visible to auth users', modify_courses], 46: ['make courses visible to auth users', modify_courses],
47: ['enrollment helper', enrollment_helper],
# 24: ['Add course evals to whole semester',instructor_list_to_activate_evals], # 24: ['Add course evals to whole semester',instructor_list_to_activate_evals],
# 21: ['Add announcements to homepage', change_course_ann_homepage], # 21: ['Add announcements to homepage', change_course_ann_homepage],
# TODO wanted: group shell for each GP (guided pathway) as a basic student services gateway.... # TODO wanted: group shell for each GP (guided pathway) as a basic student services gateway....
# #
45: ['Fetch rubric scores and comments', fetch_rubric_scores], 50: ['Fetch rubric scores and comments', fetch_rubric_scores],
46: ['Fetch announcements in a course', fetch_announcements], 51: ['Fetch announcements in a course', fetch_announcements],
} }
print ('') print ('')

View File

@ -1,6 +1,6 @@
# Try to gather all the different formats and ways of labeling a semester, along with their associated dates. # Try to gather all the different formats and ways of labeling a semester, along with their associated dates.
import json import json, funcy
sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' } sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
@ -51,6 +51,8 @@ def to_sis_sem(s):
# print(json.dumps(semester_list,indent=2)) # print(json.dumps(semester_list,indent=2))
sems = funcy.project(semester_list, code)
#print(json.dumps(sems,indent=2))
""" """

View File

@ -6,7 +6,7 @@ import re, csv
from collections import defaultdict from collections import defaultdict
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
import pytz, datetime, dateutil, json import pytz, datetime, dateutil, json
from time import timedelta from datetime import timedelta
from dateutil import tz from dateutil import tz