diff --git a/localcache2.py b/localcache2.py index 0d04d76..0507821 100644 --- a/localcache2.py +++ b/localcache2.py @@ -230,6 +230,119 @@ ORDER BY u.sortable_name;""" + +def create_schedule_table_if_not_exists(): + conn,cur = db() + + # Check if table exists + cur.execute("SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'schedule' and table_schema = 'canvas')") + table_exists = cur.fetchone()[0] + + if not table_exists: + # Create table if it doesn't exist + cur.execute("""CREATE TABLE canvas.schedule ( + id SERIAL PRIMARY KEY, + canvascourse INTEGER REFERENCES canvas.courses, + crn VARCHAR(20), + code VARCHAR(30), + units VARCHAR(20), + teacher TEXT, + start VARCHAR(30), + "end" VARCHAR(30), + type VARCHAR(20), + loc VARCHAR(80), + site VARCHAR(50), + partofday VARCHAR(40), + cap INTEGER, + act INTEGER, + sem VARCHAR(10) + ) + """) + conn.commit() + + cur.close() + + +# Populate schedule table and correlate to courses table +def courses_to_sched(): + + EXECUTE = 1 + + seasons = {'10':'wi','30':'sp','50':'su','70':'fa'} + seasons2 = {'wi':'10', 'sp':'30', 'su':'50', 'fa':'70'} + + create_schedule_table_if_not_exists() + conn,cur = db() + + # get all ilearn courses + query = """SELECT c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name + FROM canvas.courses c + JOIN canvas.enrollment_terms t ON c.enrollment_term_id = t.id + ORDER BY c.sis_source_id, c.course_code;""" + + cur.execute(query) + + sis_to_sched = {} + for row in cur.fetchall(): + sis_source_id = row[2] # c.sis_source_id + sis_to_sched.setdefault(sis_source_id, []).append(row) + + + vals_cache = [] + i = 0 + for year in ['16','17','18','19','20','21','22','23','24']: + for sem in ['sp','su','fa']: + term = f"{sem}{year}" + sis_code = f"20{year}{seasons2[sem]}" + print(term) + sched = 0 + try: + sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json() + except Exception as e: + print(e) + + query = "INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,\"end\", type, loc, site, partofday, cap, act, sem) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" + + if sched: + for c in sched: + try: + pod = '' + if 'partofday' in c: pod = c['partofday'] + + full_sis_code = sis_code+'-'+c['crn'] + + if full_sis_code in sis_to_sched: + q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code] + vals_cache.append( q ) # [ str(x) for x in q ] ) + print(f"{i}: {q}") + i += 1 + if i % 100 == 0: + if EXECUTE: + cur.executemany(query, vals_cache) + conn.commit() + vals_cache = [] + t = time.process_time() + delta = t - last_time + last_time = t + print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True) + else: + print(f"{full_sis_code} not in canvas courses.") + except Exception as e: + print(e) + + if EXECUTE: + cur.executemany(query, vals_cache) + conn.commit() + cur.close() + conn.close() + + + + + + + + if __name__ == "__main__": print ('') @@ -239,6 +352,7 @@ if __name__ == "__main__": 3: ['pages in term', pages_in_term], 4: ['new students this semester', users_new_this_semester], 5: ['all semester courses + teachers', all_sem_courses_teachers], + 6: ['Populate schedule table and correlate to courses table', courses_to_sched], } diff --git a/stats.py b/stats.py index b7bbd4e..3486fea 100644 --- a/stats.py +++ b/stats.py @@ -1201,12 +1201,12 @@ def visualize_course_modes_multi_semester(): import plotly.express as px from plotly.subplots import make_subplots seasons = {'sp':'30','su':'50','fa':'70'} - semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24".split(" ") - # sems = {'sp23':'202330','su23':'202350','fa23':'202370'} + semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24 su24 fa24".split(" ") sems = { x:'20' + x[2:] + seasons[x[:2]] for x in semcodes } sem_dfs = [] sem_dfs_depts = [] for s in sems.keys(): + print(f"fetching {s}") sched = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json() for crs in sched: if 'extra' in crs: del crs['extra'] @@ -1215,11 +1215,10 @@ def visualize_course_modes_multi_semester(): df_depts = df.copy() df_depts = df_depts.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy']) df = df.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy']) - dept_counts = grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count') + grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count') grouped_by_mode = df['type'].value_counts().reset_index() grouped_by_dept["semester"] = sems[s] grouped_by_mode["semester"] = sems[s] - #print(dept_counts) sem_dfs.append(grouped_by_mode) sem_dfs_depts.append(grouped_by_dept) @@ -1233,16 +1232,20 @@ def visualize_course_modes_multi_semester(): combined_data = pd.concat(sem_dfs, axis=0) - combined_data = combined_data.rename(columns={'type':'count','index':'type'}) + #print(combined_data) + #combined_data = combined_data.rename(columns={'type':'count','index':'type'}) + #print(combined_data) combined_data.reset_index(drop=True,inplace=True) + #print(combined_data) pivoted_data = combined_data.pivot(index='semester', columns='type', values='count') pivoted_data.reset_index(inplace=True) fig = px.bar(pivoted_data, x='semester',y=['hybrid', 'in-person', 'online', 'online live'], barmode='stack', title='Course Delivery by Semester', color_discrete_sequence=["#000066","#660000","#333366","#9400D3"]) + fig.update_layout(height=200*len(fig['data'])) fig.write_html(f"cache/sections_by_deliverymode.html") - + print(f"wrote to: cache/sections_by_deliverymode.html") combined_data_depts = pd.concat(sem_dfs_depts, axis=0) combined_data_depts.reset_index(drop=True,inplace=True) @@ -1260,19 +1263,28 @@ def visualize_course_modes_multi_semester(): fig = make_subplots(rows=len(unique_depts), cols=1, subplot_titles=unique_depts, ) + + print("\n\nindividual departments\n\n") for i, dept in enumerate(unique_depts, start=1): #if i>1: break # Filter the dataframe for the current department + print(f"{dept}") dept_data = combined_data_depts[combined_data_depts['dept'] == dept] # Pivot the data frame pivoted_dept_data = dept_data.pivot(index='semester', columns='type', values='count').reset_index() pivoted_dept_data.fillna(0,inplace=True) - print(pivoted_dept_data) + #print(pivoted_dept_data) # Plot the data columns_to_plot = ['hybrid', 'in-person', 'online', 'online live'] valid_columns = [col for col in columns_to_plot if col in pivoted_dept_data.columns] + + # to avoid futurewarning +# print(f" {valid_columns}") +# if len(valid_columns)==1: valid_columns = valid_columns[0] +# print(f" {valid_columns}") + fig_sub = px.bar(pivoted_dept_data, x='semester', y=valid_columns, barmode='stack', #title=f'Course Delivery by Semester for {dept}', color_discrete_sequence=["#000066","#660000","#333366","#9400D3"]) diff --git a/useful queries.sql b/useful queries.sql new file mode 100644 index 0000000..b15c516 --- /dev/null +++ b/useful queries.sql @@ -0,0 +1,95 @@ +select enrollment_term_id ,count(enrollment_term_id) as count from canvas.courses group by enrollment_term_id order by enrollment_term_id desc; + + +select id, name, sis_source_id from canvas.enrollment_terms order by sis_source_id ; + + +select c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name from canvas.courses c +join canvas.enrollment_terms t on c.enrollment_term_id=t.id +order by c.sis_source_id, c.course_code; + + +SELECT * FROM information_schema.tables; + + +INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,"end", type, loc, site, partofday, cap, act, sem) +VALUES (9651, '40724', 'SPAN 1A', '5.000', 'David G Perez', '1-27', '5-22', 'in-person', 'HU 105', 'Gilroy', 'Midday', 30, 26, '202030'); + + + + +-- +-- +-- users in sp24 and number of each type of course taking +-- +-- + +-- one student +select u.sortable_name, c.course_code, s.units, s.type, s.start +from canvas.users u +join canvas.enrollments e on u.id=e.user_id +join canvas.courses c on e.course_id=c.id +join canvas.schedule s on c.id=s.canvascourse +where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' -- and u.sortable_name='Zendejas, Grace' +order by u.sortable_name, s.start, c.course_code ; + + + +-- num courses +select u.sortable_name, + sum(CASE WHEN s.type = 'in-person' THEN 1 ELSE 0 end) AS inperson, + sum(CASE WHEN s.type = 'hybrid' THEN 1 ELSE 0 end) AS hybrid, + sum(CASE WHEN s.type = 'online' THEN 1 ELSE 0 end) AS online, + sum(CASE WHEN s.type = 'online live' THEN 1 ELSE 0 end) AS onlinelive, + count(*) as total +from canvas.users u +join canvas.enrollments e on u.id=e.user_id +join canvas.courses c on e.course_id=c.id +join canvas.schedule s on c.id=s.canvascourse +where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' +group by u.sortable_name +order by total desc, online desc, onlinelive desc, hybrid desc; + + + + +-- num units +select u.sortable_name, p.sis_user_id, + sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson, + sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid, + sum(CASE WHEN s.type = 'online' THEN s.units::FLOAT ELSE 0 end) AS online, + sum(CASE WHEN s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS onlinelive, + sum(s.units::FLOAT) as total +from canvas.users u +join canvas.enrollments e on u.id=e.user_id +join canvas.courses c on e.course_id=c.id +join canvas.schedule s on c.id=s.canvascourse +join canvas.pseudonyms p on u.id=p.user_id +where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' +group by u.sortable_name, p.sis_user_id +order by total desc, online desc, onlinelive desc, hybrid desc; + +-- only units +select + sum(s.units::FLOAT) as total +from canvas.users u +join canvas.enrollments e on u.id=e.user_id +join canvas.courses c on e.course_id=c.id +join canvas.schedule s on c.id=s.canvascourse +join canvas.pseudonyms p on u.id=p.user_id +where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' +group by u.sortable_name +order by total desc; + + + + +-- each class + +select u.sortable_name, c.course_code, s.type, s.units::FLOAT +from canvas.users u +join canvas.enrollments e on u.id=e.user_id +join canvas.courses c on e.course_id=c.id +join canvas.schedule s on c.id=s.canvascourse +where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' +order by u.sortable_name, c.course_code; \ No newline at end of file