local db and stats by semester

This commit is contained in:
Coding with Peter 2024-04-22 11:27:39 -07:00
parent 1c3f9dbf10
commit ef86e22114
3 changed files with 228 additions and 7 deletions

View File

@ -230,6 +230,119 @@ ORDER BY u.sortable_name;"""
def create_schedule_table_if_not_exists():
conn,cur = db()
# Check if table exists
cur.execute("SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'schedule' and table_schema = 'canvas')")
table_exists = cur.fetchone()[0]
if not table_exists:
# Create table if it doesn't exist
cur.execute("""CREATE TABLE canvas.schedule (
id SERIAL PRIMARY KEY,
canvascourse INTEGER REFERENCES canvas.courses,
crn VARCHAR(20),
code VARCHAR(30),
units VARCHAR(20),
teacher TEXT,
start VARCHAR(30),
"end" VARCHAR(30),
type VARCHAR(20),
loc VARCHAR(80),
site VARCHAR(50),
partofday VARCHAR(40),
cap INTEGER,
act INTEGER,
sem VARCHAR(10)
)
""")
conn.commit()
cur.close()
# Populate schedule table and correlate to courses table
def courses_to_sched():
EXECUTE = 1
seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
seasons2 = {'wi':'10', 'sp':'30', 'su':'50', 'fa':'70'}
create_schedule_table_if_not_exists()
conn,cur = db()
# get all ilearn courses
query = """SELECT c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name
FROM canvas.courses c
JOIN canvas.enrollment_terms t ON c.enrollment_term_id = t.id
ORDER BY c.sis_source_id, c.course_code;"""
cur.execute(query)
sis_to_sched = {}
for row in cur.fetchall():
sis_source_id = row[2] # c.sis_source_id
sis_to_sched.setdefault(sis_source_id, []).append(row)
vals_cache = []
i = 0
for year in ['16','17','18','19','20','21','22','23','24']:
for sem in ['sp','su','fa']:
term = f"{sem}{year}"
sis_code = f"20{year}{seasons2[sem]}"
print(term)
sched = 0
try:
sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
except Exception as e:
print(e)
query = "INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,\"end\", type, loc, site, partofday, cap, act, sem) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
if sched:
for c in sched:
try:
pod = ''
if 'partofday' in c: pod = c['partofday']
full_sis_code = sis_code+'-'+c['crn']
if full_sis_code in sis_to_sched:
q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code]
vals_cache.append( q ) # [ str(x) for x in q ] )
print(f"{i}: {q}")
i += 1
if i % 100 == 0:
if EXECUTE:
cur.executemany(query, vals_cache)
conn.commit()
vals_cache = []
t = time.process_time()
delta = t - last_time
last_time = t
print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True)
else:
print(f"{full_sis_code} not in canvas courses.")
except Exception as e:
print(e)
if EXECUTE:
cur.executemany(query, vals_cache)
conn.commit()
cur.close()
conn.close()
if __name__ == "__main__":
print ('')
@ -239,6 +352,7 @@ if __name__ == "__main__":
3: ['pages in term', pages_in_term],
4: ['new students this semester', users_new_this_semester],
5: ['all semester courses + teachers', all_sem_courses_teachers],
6: ['Populate schedule table and correlate to courses table', courses_to_sched],
}

View File

@ -1201,12 +1201,12 @@ def visualize_course_modes_multi_semester():
import plotly.express as px
from plotly.subplots import make_subplots
seasons = {'sp':'30','su':'50','fa':'70'}
semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24".split(" ")
# sems = {'sp23':'202330','su23':'202350','fa23':'202370'}
semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24 su24 fa24".split(" ")
sems = { x:'20' + x[2:] + seasons[x[:2]] for x in semcodes }
sem_dfs = []
sem_dfs_depts = []
for s in sems.keys():
print(f"fetching {s}")
sched = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
for crs in sched:
if 'extra' in crs: del crs['extra']
@ -1215,11 +1215,10 @@ def visualize_course_modes_multi_semester():
df_depts = df.copy()
df_depts = df_depts.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy'])
df = df.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy'])
dept_counts = grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count')
grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count')
grouped_by_mode = df['type'].value_counts().reset_index()
grouped_by_dept["semester"] = sems[s]
grouped_by_mode["semester"] = sems[s]
#print(dept_counts)
sem_dfs.append(grouped_by_mode)
sem_dfs_depts.append(grouped_by_dept)
@ -1233,16 +1232,20 @@ def visualize_course_modes_multi_semester():
combined_data = pd.concat(sem_dfs, axis=0)
combined_data = combined_data.rename(columns={'type':'count','index':'type'})
#print(combined_data)
#combined_data = combined_data.rename(columns={'type':'count','index':'type'})
#print(combined_data)
combined_data.reset_index(drop=True,inplace=True)
#print(combined_data)
pivoted_data = combined_data.pivot(index='semester', columns='type', values='count')
pivoted_data.reset_index(inplace=True)
fig = px.bar(pivoted_data, x='semester',y=['hybrid', 'in-person', 'online', 'online live'], barmode='stack',
title='Course Delivery by Semester',
color_discrete_sequence=["#000066","#660000","#333366","#9400D3"])
fig.update_layout(height=200*len(fig['data']))
fig.write_html(f"cache/sections_by_deliverymode.html")
print(f"wrote to: cache/sections_by_deliverymode.html")
combined_data_depts = pd.concat(sem_dfs_depts, axis=0)
combined_data_depts.reset_index(drop=True,inplace=True)
@ -1260,19 +1263,28 @@ def visualize_course_modes_multi_semester():
fig = make_subplots(rows=len(unique_depts), cols=1,
subplot_titles=unique_depts,
)
print("\n\nindividual departments\n\n")
for i, dept in enumerate(unique_depts, start=1):
#if i>1: break
# Filter the dataframe for the current department
print(f"{dept}")
dept_data = combined_data_depts[combined_data_depts['dept'] == dept]
# Pivot the data frame
pivoted_dept_data = dept_data.pivot(index='semester', columns='type', values='count').reset_index()
pivoted_dept_data.fillna(0,inplace=True)
print(pivoted_dept_data)
#print(pivoted_dept_data)
# Plot the data
columns_to_plot = ['hybrid', 'in-person', 'online', 'online live']
valid_columns = [col for col in columns_to_plot if col in pivoted_dept_data.columns]
# to avoid futurewarning
# print(f" {valid_columns}")
# if len(valid_columns)==1: valid_columns = valid_columns[0]
# print(f" {valid_columns}")
fig_sub = px.bar(pivoted_dept_data, x='semester', y=valid_columns, barmode='stack',
#title=f'Course Delivery by Semester for {dept}',
color_discrete_sequence=["#000066","#660000","#333366","#9400D3"])

95
useful queries.sql Normal file
View File

@ -0,0 +1,95 @@
select enrollment_term_id ,count(enrollment_term_id) as count from canvas.courses group by enrollment_term_id order by enrollment_term_id desc;
select id, name, sis_source_id from canvas.enrollment_terms order by sis_source_id ;
select c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name from canvas.courses c
join canvas.enrollment_terms t on c.enrollment_term_id=t.id
order by c.sis_source_id, c.course_code;
SELECT * FROM information_schema.tables;
INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,"end", type, loc, site, partofday, cap, act, sem)
VALUES (9651, '40724', 'SPAN 1A', '5.000', 'David G Perez', '1-27', '5-22', 'in-person', 'HU 105', 'Gilroy', 'Midday', 30, 26, '202030');
--
--
-- users in sp24 and number of each type of course taking
--
--
-- one student
select u.sortable_name, c.course_code, s.units, s.type, s.start
from canvas.users u
join canvas.enrollments e on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.schedule s on c.id=s.canvascourse
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' -- and u.sortable_name='Zendejas, Grace'
order by u.sortable_name, s.start, c.course_code ;
-- num courses
select u.sortable_name,
sum(CASE WHEN s.type = 'in-person' THEN 1 ELSE 0 end) AS inperson,
sum(CASE WHEN s.type = 'hybrid' THEN 1 ELSE 0 end) AS hybrid,
sum(CASE WHEN s.type = 'online' THEN 1 ELSE 0 end) AS online,
sum(CASE WHEN s.type = 'online live' THEN 1 ELSE 0 end) AS onlinelive,
count(*) as total
from canvas.users u
join canvas.enrollments e on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.schedule s on c.id=s.canvascourse
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
group by u.sortable_name
order by total desc, online desc, onlinelive desc, hybrid desc;
-- num units
select u.sortable_name, p.sis_user_id,
sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson,
sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid,
sum(CASE WHEN s.type = 'online' THEN s.units::FLOAT ELSE 0 end) AS online,
sum(CASE WHEN s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS onlinelive,
sum(s.units::FLOAT) as total
from canvas.users u
join canvas.enrollments e on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.schedule s on c.id=s.canvascourse
join canvas.pseudonyms p on u.id=p.user_id
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
group by u.sortable_name, p.sis_user_id
order by total desc, online desc, onlinelive desc, hybrid desc;
-- only units
select
sum(s.units::FLOAT) as total
from canvas.users u
join canvas.enrollments e on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.schedule s on c.id=s.canvascourse
join canvas.pseudonyms p on u.id=p.user_id
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
group by u.sortable_name
order by total desc;
-- each class
select u.sortable_name, c.course_code, s.type, s.units::FLOAT
from canvas.users u
join canvas.enrollments e on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.schedule s on c.id=s.canvascourse
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
order by u.sortable_name, c.course_code;