local db and stats by semester
This commit is contained in:
parent
1c3f9dbf10
commit
ef86e22114
114
localcache2.py
114
localcache2.py
|
|
@ -230,6 +230,119 @@ ORDER BY u.sortable_name;"""
|
|||
|
||||
|
||||
|
||||
|
||||
def create_schedule_table_if_not_exists():
|
||||
conn,cur = db()
|
||||
|
||||
# Check if table exists
|
||||
cur.execute("SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'schedule' and table_schema = 'canvas')")
|
||||
table_exists = cur.fetchone()[0]
|
||||
|
||||
if not table_exists:
|
||||
# Create table if it doesn't exist
|
||||
cur.execute("""CREATE TABLE canvas.schedule (
|
||||
id SERIAL PRIMARY KEY,
|
||||
canvascourse INTEGER REFERENCES canvas.courses,
|
||||
crn VARCHAR(20),
|
||||
code VARCHAR(30),
|
||||
units VARCHAR(20),
|
||||
teacher TEXT,
|
||||
start VARCHAR(30),
|
||||
"end" VARCHAR(30),
|
||||
type VARCHAR(20),
|
||||
loc VARCHAR(80),
|
||||
site VARCHAR(50),
|
||||
partofday VARCHAR(40),
|
||||
cap INTEGER,
|
||||
act INTEGER,
|
||||
sem VARCHAR(10)
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
cur.close()
|
||||
|
||||
|
||||
# Populate schedule table and correlate to courses table
|
||||
def courses_to_sched():
|
||||
|
||||
EXECUTE = 1
|
||||
|
||||
seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
|
||||
seasons2 = {'wi':'10', 'sp':'30', 'su':'50', 'fa':'70'}
|
||||
|
||||
create_schedule_table_if_not_exists()
|
||||
conn,cur = db()
|
||||
|
||||
# get all ilearn courses
|
||||
query = """SELECT c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name
|
||||
FROM canvas.courses c
|
||||
JOIN canvas.enrollment_terms t ON c.enrollment_term_id = t.id
|
||||
ORDER BY c.sis_source_id, c.course_code;"""
|
||||
|
||||
cur.execute(query)
|
||||
|
||||
sis_to_sched = {}
|
||||
for row in cur.fetchall():
|
||||
sis_source_id = row[2] # c.sis_source_id
|
||||
sis_to_sched.setdefault(sis_source_id, []).append(row)
|
||||
|
||||
|
||||
vals_cache = []
|
||||
i = 0
|
||||
for year in ['16','17','18','19','20','21','22','23','24']:
|
||||
for sem in ['sp','su','fa']:
|
||||
term = f"{sem}{year}"
|
||||
sis_code = f"20{year}{seasons2[sem]}"
|
||||
print(term)
|
||||
sched = 0
|
||||
try:
|
||||
sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
query = "INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,\"end\", type, loc, site, partofday, cap, act, sem) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
|
||||
|
||||
if sched:
|
||||
for c in sched:
|
||||
try:
|
||||
pod = ''
|
||||
if 'partofday' in c: pod = c['partofday']
|
||||
|
||||
full_sis_code = sis_code+'-'+c['crn']
|
||||
|
||||
if full_sis_code in sis_to_sched:
|
||||
q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code]
|
||||
vals_cache.append( q ) # [ str(x) for x in q ] )
|
||||
print(f"{i}: {q}")
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
if EXECUTE:
|
||||
cur.executemany(query, vals_cache)
|
||||
conn.commit()
|
||||
vals_cache = []
|
||||
t = time.process_time()
|
||||
delta = t - last_time
|
||||
last_time = t
|
||||
print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True)
|
||||
else:
|
||||
print(f"{full_sis_code} not in canvas courses.")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if EXECUTE:
|
||||
cur.executemany(query, vals_cache)
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print ('')
|
||||
|
|
@ -239,6 +352,7 @@ if __name__ == "__main__":
|
|||
3: ['pages in term', pages_in_term],
|
||||
4: ['new students this semester', users_new_this_semester],
|
||||
5: ['all semester courses + teachers', all_sem_courses_teachers],
|
||||
6: ['Populate schedule table and correlate to courses table', courses_to_sched],
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
26
stats.py
26
stats.py
|
|
@ -1201,12 +1201,12 @@ def visualize_course_modes_multi_semester():
|
|||
import plotly.express as px
|
||||
from plotly.subplots import make_subplots
|
||||
seasons = {'sp':'30','su':'50','fa':'70'}
|
||||
semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24".split(" ")
|
||||
# sems = {'sp23':'202330','su23':'202350','fa23':'202370'}
|
||||
semcodes = "sp18 su18 fa18 sp19 su19 fa19 sp20 su20 fa20 sp21 su21 fa21 sp22 su22 fa22 sp23 su23 fa23 sp24 su24 fa24".split(" ")
|
||||
sems = { x:'20' + x[2:] + seasons[x[:2]] for x in semcodes }
|
||||
sem_dfs = []
|
||||
sem_dfs_depts = []
|
||||
for s in sems.keys():
|
||||
print(f"fetching {s}")
|
||||
sched = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
|
||||
for crs in sched:
|
||||
if 'extra' in crs: del crs['extra']
|
||||
|
|
@ -1215,11 +1215,10 @@ def visualize_course_modes_multi_semester():
|
|||
df_depts = df.copy()
|
||||
df_depts = df_depts.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy'])
|
||||
df = df.drop(columns=['crn','sec','code','cmp','name','days','time','rem','wl_cap','wl_act','wl_rem','teacher','date','loc','ztc','time_start','time_end','start','end','doy'])
|
||||
dept_counts = grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count')
|
||||
grouped_by_dept = df_depts.groupby(['dept','type']).size().reset_index(name='count')
|
||||
grouped_by_mode = df['type'].value_counts().reset_index()
|
||||
grouped_by_dept["semester"] = sems[s]
|
||||
grouped_by_mode["semester"] = sems[s]
|
||||
#print(dept_counts)
|
||||
sem_dfs.append(grouped_by_mode)
|
||||
sem_dfs_depts.append(grouped_by_dept)
|
||||
|
||||
|
|
@ -1233,16 +1232,20 @@ def visualize_course_modes_multi_semester():
|
|||
|
||||
|
||||
combined_data = pd.concat(sem_dfs, axis=0)
|
||||
combined_data = combined_data.rename(columns={'type':'count','index':'type'})
|
||||
#print(combined_data)
|
||||
#combined_data = combined_data.rename(columns={'type':'count','index':'type'})
|
||||
#print(combined_data)
|
||||
combined_data.reset_index(drop=True,inplace=True)
|
||||
#print(combined_data)
|
||||
pivoted_data = combined_data.pivot(index='semester', columns='type', values='count')
|
||||
pivoted_data.reset_index(inplace=True)
|
||||
|
||||
fig = px.bar(pivoted_data, x='semester',y=['hybrid', 'in-person', 'online', 'online live'], barmode='stack',
|
||||
title='Course Delivery by Semester',
|
||||
color_discrete_sequence=["#000066","#660000","#333366","#9400D3"])
|
||||
fig.update_layout(height=200*len(fig['data']))
|
||||
fig.write_html(f"cache/sections_by_deliverymode.html")
|
||||
|
||||
print(f"wrote to: cache/sections_by_deliverymode.html")
|
||||
|
||||
combined_data_depts = pd.concat(sem_dfs_depts, axis=0)
|
||||
combined_data_depts.reset_index(drop=True,inplace=True)
|
||||
|
|
@ -1260,19 +1263,28 @@ def visualize_course_modes_multi_semester():
|
|||
fig = make_subplots(rows=len(unique_depts), cols=1,
|
||||
subplot_titles=unique_depts,
|
||||
)
|
||||
|
||||
print("\n\nindividual departments\n\n")
|
||||
for i, dept in enumerate(unique_depts, start=1):
|
||||
#if i>1: break
|
||||
# Filter the dataframe for the current department
|
||||
print(f"{dept}")
|
||||
dept_data = combined_data_depts[combined_data_depts['dept'] == dept]
|
||||
|
||||
# Pivot the data frame
|
||||
pivoted_dept_data = dept_data.pivot(index='semester', columns='type', values='count').reset_index()
|
||||
pivoted_dept_data.fillna(0,inplace=True)
|
||||
print(pivoted_dept_data)
|
||||
#print(pivoted_dept_data)
|
||||
|
||||
# Plot the data
|
||||
columns_to_plot = ['hybrid', 'in-person', 'online', 'online live']
|
||||
valid_columns = [col for col in columns_to_plot if col in pivoted_dept_data.columns]
|
||||
|
||||
# to avoid futurewarning
|
||||
# print(f" {valid_columns}")
|
||||
# if len(valid_columns)==1: valid_columns = valid_columns[0]
|
||||
# print(f" {valid_columns}")
|
||||
|
||||
fig_sub = px.bar(pivoted_dept_data, x='semester', y=valid_columns, barmode='stack',
|
||||
#title=f'Course Delivery by Semester for {dept}',
|
||||
color_discrete_sequence=["#000066","#660000","#333366","#9400D3"])
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
select enrollment_term_id ,count(enrollment_term_id) as count from canvas.courses group by enrollment_term_id order by enrollment_term_id desc;
|
||||
|
||||
|
||||
select id, name, sis_source_id from canvas.enrollment_terms order by sis_source_id ;
|
||||
|
||||
|
||||
select c.id, c.workflow_state, c.sis_source_id, c.course_code, c.enrollment_term_id, t.name from canvas.courses c
|
||||
join canvas.enrollment_terms t on c.enrollment_term_id=t.id
|
||||
order by c.sis_source_id, c.course_code;
|
||||
|
||||
|
||||
SELECT * FROM information_schema.tables;
|
||||
|
||||
|
||||
INSERT INTO canvas.schedule (canvascourse, crn, code, units, teacher, start,"end", type, loc, site, partofday, cap, act, sem)
|
||||
VALUES (9651, '40724', 'SPAN 1A', '5.000', 'David G Perez', '1-27', '5-22', 'in-person', 'HU 105', 'Gilroy', 'Midday', 30, 26, '202030');
|
||||
|
||||
|
||||
|
||||
|
||||
--
|
||||
--
|
||||
-- users in sp24 and number of each type of course taking
|
||||
--
|
||||
--
|
||||
|
||||
-- one student
|
||||
select u.sortable_name, c.course_code, s.units, s.type, s.start
|
||||
from canvas.users u
|
||||
join canvas.enrollments e on u.id=e.user_id
|
||||
join canvas.courses c on e.course_id=c.id
|
||||
join canvas.schedule s on c.id=s.canvascourse
|
||||
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' -- and u.sortable_name='Zendejas, Grace'
|
||||
order by u.sortable_name, s.start, c.course_code ;
|
||||
|
||||
|
||||
|
||||
-- num courses
|
||||
select u.sortable_name,
|
||||
sum(CASE WHEN s.type = 'in-person' THEN 1 ELSE 0 end) AS inperson,
|
||||
sum(CASE WHEN s.type = 'hybrid' THEN 1 ELSE 0 end) AS hybrid,
|
||||
sum(CASE WHEN s.type = 'online' THEN 1 ELSE 0 end) AS online,
|
||||
sum(CASE WHEN s.type = 'online live' THEN 1 ELSE 0 end) AS onlinelive,
|
||||
count(*) as total
|
||||
from canvas.users u
|
||||
join canvas.enrollments e on u.id=e.user_id
|
||||
join canvas.courses c on e.course_id=c.id
|
||||
join canvas.schedule s on c.id=s.canvascourse
|
||||
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
|
||||
group by u.sortable_name
|
||||
order by total desc, online desc, onlinelive desc, hybrid desc;
|
||||
|
||||
|
||||
|
||||
|
||||
-- num units
|
||||
select u.sortable_name, p.sis_user_id,
|
||||
sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson,
|
||||
sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid,
|
||||
sum(CASE WHEN s.type = 'online' THEN s.units::FLOAT ELSE 0 end) AS online,
|
||||
sum(CASE WHEN s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS onlinelive,
|
||||
sum(s.units::FLOAT) as total
|
||||
from canvas.users u
|
||||
join canvas.enrollments e on u.id=e.user_id
|
||||
join canvas.courses c on e.course_id=c.id
|
||||
join canvas.schedule s on c.id=s.canvascourse
|
||||
join canvas.pseudonyms p on u.id=p.user_id
|
||||
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
|
||||
group by u.sortable_name, p.sis_user_id
|
||||
order by total desc, online desc, onlinelive desc, hybrid desc;
|
||||
|
||||
-- only units
|
||||
select
|
||||
sum(s.units::FLOAT) as total
|
||||
from canvas.users u
|
||||
join canvas.enrollments e on u.id=e.user_id
|
||||
join canvas.courses c on e.course_id=c.id
|
||||
join canvas.schedule s on c.id=s.canvascourse
|
||||
join canvas.pseudonyms p on u.id=p.user_id
|
||||
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
|
||||
group by u.sortable_name
|
||||
order by total desc;
|
||||
|
||||
|
||||
|
||||
|
||||
-- each class
|
||||
|
||||
select u.sortable_name, c.course_code, s.type, s.units::FLOAT
|
||||
from canvas.users u
|
||||
join canvas.enrollments e on u.id=e.user_id
|
||||
join canvas.courses c on e.course_id=c.id
|
||||
join canvas.schedule s on c.id=s.canvascourse
|
||||
where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment'
|
||||
order by u.sortable_name, c.course_code;
|
||||
Loading…
Reference in New Issue