1409 lines
48 KiB
Python
1409 lines
48 KiB
Python
# schedule.py
|
||
#
|
||
# manipulating and querying the schedule of courses
|
||
|
||
|
||
#from telnetlib import GA
|
||
from sqlite3 import Row
|
||
import json, re, sys, os, codecs, csv, time,requests
|
||
#from tkinter.tix import ROW
|
||
from typing import Generator
|
||
from pathlib import Path
|
||
from bs4 import BeautifulSoup as bs
|
||
from datetime import datetime
|
||
from fast_autocomplete import AutoComplete
|
||
from io import StringIO
|
||
from time import strptime
|
||
from deepdiff import DeepDiff
|
||
from datetime import datetime as dt
|
||
from dateutil import parser
|
||
from util import fix_t_name, split_class_dept, split_class_code, split_class_code_letter
|
||
|
||
import pandas as pd
|
||
|
||
from semesters import short_to_long
|
||
from pipelines import put_file
|
||
|
||
from collections import defaultdict
|
||
from semesters import short_to_sis, sis_to_human
|
||
from localcache2 import everyone_teacher_role, iLearn_name_from_goo
|
||
import funcy
|
||
|
||
from canvas_secrets import GOO, GOO_PIN
|
||
|
||
DEBUG = 0
|
||
|
||
def d(s,end=''):
|
||
global DEBUG
|
||
if end and DEBUG: print(s,end=end)
|
||
elif DEBUG: print(s)
|
||
|
||
|
||
|
||
##
|
||
## DEPTS, GUIDED PATHWAY CLUSTERS, AREAS and DEANS
|
||
def campus_dept_hierarchy():
|
||
courses_csv = '''Course,GP,AreaCode
|
||
ACCT,info,cwp
|
||
AE,skill,cwp
|
||
AH,well,nah
|
||
AJ,skill,cwp
|
||
AMT,skill,cwp
|
||
ANTH,soc,ahss
|
||
APE,skill,cwp
|
||
ART,art,ahss
|
||
ASTR,stem,stem
|
||
ATH,well,nah
|
||
BIO,stem,stem
|
||
BIOT,stem,stem
|
||
BOT,info,cwp
|
||
BUS,info,cwp
|
||
CARP,skill,cwp
|
||
CD,skill,cwp
|
||
CHEM,stem,stem
|
||
CHN,comm,ahss
|
||
CMGT,skill,cwp
|
||
CMUN,comm,ahss
|
||
COMM,comm,ahss
|
||
COMMC,comm,ahss
|
||
COS,skill,cwp
|
||
CSIS,stem,cwp
|
||
CUL,skill,cwp
|
||
CWE,skill,cwp
|
||
DE,comm,stem
|
||
DM,info,cwp
|
||
DRLT,skill,cwp
|
||
ECOL,stem,stem
|
||
ECON,info,cwp
|
||
ENGL,soc,ahss
|
||
ENGLC,soc,ahss
|
||
ENGR,stem,stem
|
||
ENVS,stem,stem
|
||
ESL,comm,cwp
|
||
ETHN,comm,ahss
|
||
FRNH,comm,ahss
|
||
GEOG,stem,stem
|
||
GEOL,stem,stem
|
||
GUID,soc,c
|
||
HE,well,nah
|
||
HIST,soc,ahss
|
||
HUM,soc,ahss
|
||
HVAC,skill,cwp
|
||
CGD,skill,cwp
|
||
JFT,skill,cwp
|
||
JLE,skill,cwp
|
||
JOUR,comm,ahss
|
||
JPN,comm,ahss
|
||
KIN,well,nah
|
||
LIB,comm,stem
|
||
LIFE,well,nah
|
||
MATH,stem,stem
|
||
STATC,stem,stem
|
||
STAT,stem,stem
|
||
MCTV,art,ahss
|
||
FTVE,art,ahss
|
||
MUS,art,ahss
|
||
PHIL,soc,ahss
|
||
PHYS,stem,stem
|
||
POLS,soc,ahss
|
||
POLSC,soc,ahss
|
||
PSCI,stem,stem
|
||
PSYC,soc,ahss
|
||
PSYCC,soc,ahss
|
||
RE,skill,cwp
|
||
SJS,soc,ahss
|
||
SOC,soc,ahss
|
||
SPAN,comm,ahss
|
||
THEA,art,ahss
|
||
WELD,skill,cwp
|
||
HORT,skill,cwp
|
||
WTRM,skill,cwp
|
||
MGMT,skill,cwp
|
||
MKTG,skill,cwp
|
||
HTM,skill,cwp'''
|
||
|
||
areas_csv = '''Area,AreaCode,DeanCode,DeanName
|
||
Nursing and Allied Health,nah,et,[Dean: Nursing/Allied Health]
|
||
Career Education and Workforce Pathways,cwp,vc,Vins Chacko
|
||
Arts Humanities and Social Sciences,ahss,nl,[Dean: AHSS]
|
||
Counseling,c,de,Diego Espinoza
|
||
Student Support and Special Programs,sssp,de,Diego Espinoza
|
||
Science Technology Engineering and Mathematics,stem,jn,Jennifer Nari'''
|
||
|
||
|
||
courses_df = pd.read_csv(StringIO(courses_csv))
|
||
areas_df = pd.read_csv(StringIO(areas_csv))
|
||
#print("areas_df")
|
||
#print(areas_df)
|
||
|
||
# Recreate gp dictionary
|
||
course_to_gp = dict(zip(courses_df['Course'], courses_df['GP']))
|
||
|
||
# Recreate area dictionary
|
||
course_to_area = dict(zip(courses_df['Course'], courses_df['AreaCode']))
|
||
|
||
# Recreate areas dictionary (mapping AreaCode to full Area Name)
|
||
areacode_to_area = dict(zip(areas_df['AreaCode'], areas_df['Area']))
|
||
|
||
# Recreate dean dictionary (mapping Course -> DeanCode)
|
||
area_to_dean = dict(zip(areas_df['AreaCode'], areas_df['DeanCode']))
|
||
course_to_dean = {course: area_to_dean[area_code] for course, area_code in course_to_area.items()}
|
||
|
||
# Recreate dean_names dictionary
|
||
dean_code_to_name = dict(zip(areas_df['DeanCode'], areas_df['DeanName']))
|
||
|
||
# Print samples to verify
|
||
if 0:
|
||
print("course_to_gp:", list(course_to_gp.items())[:5])
|
||
print("course_to_area:", list(course_to_area.items())[:5])
|
||
print("areacode_to_area:", list(areacode_to_area.items())[:5])
|
||
print("area_to_dean:", list(area_to_dean.items())[:5])
|
||
print("course_to_dean:", list(course_to_dean.items())[:5])
|
||
print("dean_code_to_name:", list(dean_code_to_name.items())[:5])
|
||
|
||
return (course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name)
|
||
|
||
|
||
##
|
||
## SEMESTER FETCHING
|
||
##
|
||
##
|
||
|
||
|
||
def scrape_schedule_multi():
|
||
|
||
global SEMESTER, short_sem, semester_begin, filename, filename_html
|
||
|
||
(gp, course_to_area, areacode_to_area, area_to_dean, dean, dean_code_to_name) = campus_dept_hierarchy()
|
||
|
||
SEMESTER = 'Spring 2025'
|
||
short_sem = 'sp25'
|
||
semester_begin = strptime('01/27', '%m/%d')
|
||
filename = 'sp25_sched.json'
|
||
filename_html = 'sp25_sched.html'
|
||
|
||
SEM = ['Fall 2022', 'Summer 2022 (View only)', 'Spring 2022 (View only)',
|
||
'Fall 2021 (View only)', 'Summer 2021 (View only)', 'Spring 2021 (View only)', 'Fall 2020 (View only)', 'Summer 2020 (View only)', 'Spring 2020 (View only)',
|
||
'Fall 2019 (View only)', 'Summer 2019 (View only)', 'Spring 2019 (View only)', 'Fall 2018 (View only)', 'Summer 2018 (View only)', 'Spring 2018 (View only)' ]
|
||
|
||
|
||
|
||
srt = 'fa22,su22,sp22,fa21,su21,sp21,fa20,su20,sp20,fa19,su19,sp19,fa18,su18,sp18'.split(',')
|
||
beg = ['08/22','06/13','01/31','08/23','06/14','02/01','08/24','06/15','01/27','08/26','06/17','01/28','08/27','06/18','01/29']
|
||
|
||
#for i in [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]:
|
||
|
||
#SEMESTER = SEM[i]
|
||
#short_sem = srt[i]
|
||
#semester_begin = strptime(beg[i], '%m/%d')
|
||
#filename = '%s_sched.json' % short_sem
|
||
#filename_html = '%s_sched.html' % short_sem
|
||
|
||
as_dict = scrape_schedule()
|
||
|
||
expanded = list_latestarts(short_sem)
|
||
fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
|
||
|
||
ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
|
||
with ffcsv as csvfile:
|
||
csvwriter = csv.writer(csvfile)
|
||
csvwriter.writerow(fields)
|
||
|
||
for S in expanded:
|
||
parts = S['code'].split(' ')
|
||
S['dept'] = parts[0]
|
||
S['num'] = parts[1]
|
||
S['gp'] = gp[parts[0]]
|
||
S['dean'] = dean[parts[0]]
|
||
S['sem'] = short_sem
|
||
# S['act'] = S['cap']
|
||
if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
|
||
csvwriter.writerow( [ S[x] for x in fields ] )
|
||
|
||
put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0)
|
||
|
||
|
||
|
||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||
def list_latestarts(term="fa23"):
|
||
|
||
show_summary = 1
|
||
|
||
the_year = '20' + term[2:4]
|
||
print("year: ", the_year, " semester: ", term)
|
||
|
||
#term_in = "cache/%s_sched.json" % term
|
||
term_out = "cache/%s_latestarts.txt" % term
|
||
expanded_out = "%s_sched_expanded.json" % term
|
||
print("Writing output to " + term_out)
|
||
#infile = codecs.open(term_in, "r", "utf-8")
|
||
outfile = codecs.open(term_out, "w", "utf-8")
|
||
exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
|
||
expanded = []
|
||
#sched = json.loads(infile.read())
|
||
|
||
|
||
sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
|
||
#print sched
|
||
by_date = {}
|
||
|
||
if show_summary: print("course \t loc \t type \t time")
|
||
|
||
for C in sched:
|
||
if (not C['type']) and C['loc'] != 'ONLINE': # and C['time']:
|
||
C['type'] = 'in-person'
|
||
|
||
if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
|
||
|
||
if 'extra' in C:
|
||
if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
|
||
C['type'] = 'hybrid'
|
||
|
||
times = C['time'].split("-")
|
||
if len(times) > 1:
|
||
time_start = times[0]
|
||
time_end = times[1]
|
||
|
||
try:
|
||
startt = time.strptime(time_start,"%I:%M %p")
|
||
endt = time.strptime(time_end,"%I:%M %p")
|
||
min_start = startt.tm_min
|
||
min_end = endt.tm_min
|
||
if min_start == 0: min_start = "00"
|
||
else: min_start = str(min_start)
|
||
if min_end == 0: min_end = "00"
|
||
else: min_end = str(min_end)
|
||
C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
|
||
C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
|
||
if 0:
|
||
print("+ Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
|
||
except Exception as e:
|
||
print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
|
||
else:
|
||
C['time_start'] = ''
|
||
C['time_end'] = ''
|
||
|
||
if re.search('TBA',C['date']):
|
||
C['start'] = ''
|
||
C['end'] = ''
|
||
C['doy'] = ''
|
||
expanded.append(C)
|
||
continue
|
||
|
||
parts = C['date'].split("-")
|
||
start = parts[0] + "/" + the_year
|
||
end = parts[1] + "/" + the_year
|
||
|
||
try:
|
||
startd = parser.parse(start)
|
||
endd = parser.parse(end)
|
||
C['start'] = "%i-%i" % (startd.month,startd.day)
|
||
C['end'] = "%i-%i" % (endd.month,endd.day)
|
||
C['doy'] = startd.timetuple().tm_yday
|
||
expanded.append(C)
|
||
except Exception as e:
|
||
print(e, "\n-- problem parsing ", start, " or ", end)
|
||
if not startd in by_date:
|
||
by_date[startd] = []
|
||
by_date[startd].append(C)
|
||
|
||
exoutfile.write( json.dumps(expanded,indent=2) )
|
||
exoutfile.close()
|
||
put_file('/home/public/schedule/', 'cache/', expanded_out, 0)
|
||
|
||
for X in sorted(by_date.keys()):
|
||
#print("Start: ", X)
|
||
if len(by_date[X]) < 200:
|
||
prettydate = X.strftime("%A, %B %d")
|
||
#print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
||
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
||
for Y in by_date[X]:
|
||
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
||
#print(Y)
|
||
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
||
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
||
outfile.close()
|
||
put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)
|
||
return expanded
|
||
|
||
|
||
|
||
|
||
# Schedule / course filling history
|
||
# csv headers: crn, code, teacher, datetime, cap, act, wlcap, wlact
|
||
# Log the history of enrollments per course during registration
|
||
def log_section_filling(current_sched_list, short_sem):
|
||
rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
|
||
rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
|
||
print(rows_j)
|
||
now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
|
||
csv_fn = 'cache/reg_history_' + short_sem + '.csv'
|
||
with codecs.open(csv_fn,'a','utf-8') as f:
|
||
writer = csv.writer(f)
|
||
for S in current_sched_list:
|
||
#print(S)
|
||
items = [now,]
|
||
[ items.append( S[X] ) for X in rows_j ]
|
||
writer.writerow(items)
|
||
|
||
# Same as above, but compressed, act only
|
||
def log_section_filling2(current_sched_list, short_sem):
|
||
|
||
|
||
|
||
now = datetime.datetime.now().strftime('%Y-%m-%dT%H')
|
||
|
||
todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
|
||
#print(todays_data)
|
||
|
||
todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
|
||
todays_df = todays_df.rename_axis('crn')
|
||
todays_df.index = todays_df.index.astype(str)
|
||
#print(todays_df)
|
||
todays_df.to_csv('cache/reg_today_new.csv', index=True)
|
||
|
||
csv_path = pathlib.Path('cache') / f'reg_data_{short_sem}.csv'
|
||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||
try:
|
||
myframe = pd.read_csv(csv_path)
|
||
print(myframe)
|
||
except FileNotFoundError:
|
||
myframe = pd.DataFrame(columns=['crn'])
|
||
print("Creating new data file for this semester.")
|
||
except pd.errors.EmptyDataError:
|
||
myframe = pd.DataFrame(columns=['crn'])
|
||
print("Existing data file was empty; starting fresh for this semester.")
|
||
|
||
if 'crn' in myframe.columns:
|
||
myframe = myframe.set_index('crn')
|
||
else:
|
||
myframe = myframe.rename_axis('crn')
|
||
myframe.index = myframe.index.astype(str)
|
||
combined_df = myframe.reindex(myframe.index.union(todays_df.index))
|
||
combined_df[now] = todays_df[now]
|
||
combined_df = combined_df.sort_index()
|
||
combined_df = combined_df.reset_index()
|
||
combined_df = combined_df.fillna('')
|
||
print(combined_df)
|
||
|
||
reg_data_filename = 'reg_data_' + short_sem + '.csv'
|
||
tmp_path = csv_path.with_suffix(csv_path.suffix + '.tmp')
|
||
combined_df.to_csv(tmp_path, index=False)
|
||
tmp_path.replace(csv_path)
|
||
put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0)
|
||
|
||
|
||
# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
|
||
def scrape_schedule(short_sem, semester_form_text="SPRING 2025"):
|
||
|
||
filename = f"{short_sem}_sched.json"
|
||
|
||
#url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu"
|
||
url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
|
||
|
||
|
||
text = ''
|
||
|
||
from selenium import webdriver
|
||
from selenium.webdriver.common.keys import Keys
|
||
from selenium.webdriver.support.ui import WebDriverWait, Select
|
||
from selenium.webdriver.common.by import By
|
||
from selenium.webdriver.support import expected_conditions as EC
|
||
|
||
try:
|
||
driver = webdriver.Firefox()
|
||
driver.get(url)
|
||
driver.find_element_by_id("UserID").clear()
|
||
driver.find_element_by_id("UserID").send_keys(GOO)
|
||
driver.find_element_by_name("PIN").send_keys(GOO_PIN)
|
||
driver.find_element_by_name("loginform").submit()
|
||
driver.implicitly_wait(5)
|
||
|
||
print(driver.title)
|
||
|
||
driver.find_element_by_link_text("Students").click()
|
||
driver.implicitly_wait(5)
|
||
print(driver.title)
|
||
|
||
driver.find_element_by_link_text("Registration").click()
|
||
driver.implicitly_wait(5)
|
||
print(driver.title)
|
||
|
||
driver.find_element_by_link_text("Search for Classes").click()
|
||
driver.implicitly_wait(15)
|
||
print(driver.title)
|
||
|
||
dd = Select(driver.find_element_by_name("p_term"))
|
||
if (dd):
|
||
dd.select_by_visible_text(SEMESTER)
|
||
driver.find_element_by_xpath("/html/body/div/div[4]/form").submit()
|
||
driver.implicitly_wait(15)
|
||
print(driver.title)
|
||
|
||
driver.find_element_by_xpath("/html/body/div/div[4]/form/input[18]").click()
|
||
driver.implicitly_wait(10)
|
||
print(driver.title)
|
||
|
||
driver.find_element_by_name("SUB_BTN").click()
|
||
driver.implicitly_wait(40)
|
||
time.sleep(15)
|
||
driver.implicitly_wait(40)
|
||
print(driver.title)
|
||
text = driver.page_source
|
||
driver.quit()
|
||
|
||
except Exception as e:
|
||
print("Got an exception: ", e)
|
||
finally:
|
||
print("")
|
||
#driver.quit()
|
||
|
||
|
||
|
||
|
||
|
||
|
||
codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
|
||
|
||
|
||
|
||
#print(text)
|
||
as_list = ssb_to_csv(text)
|
||
#print(as_list)
|
||
as_dict = to_section_list(as_list)
|
||
jj = json.dumps(as_dict,indent=2)
|
||
|
||
# TODO
|
||
try:
|
||
ps = codecs.open('cache/'+filename,'r','utf-8')
|
||
prev_sched = json.loads(ps.read())
|
||
ps.close()
|
||
|
||
if 1: # sometimes I want to re-run this without affecting the logs.
|
||
log_section_filling(as_dict, short_sem)
|
||
log_section_filling2(as_dict, short_sem)
|
||
|
||
dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
|
||
pretty_json = json.dumps( json.loads( dd.to_json() ), indent=2 )
|
||
codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write( pretty_json ) # dd.to_json() )
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
print("Can't do diff?")
|
||
|
||
# Next, rename the prev sched_xxYY.json data file to have its date,
|
||
# make this new one, and then upload it to the website.
|
||
# Maybe even count the entries and do a little sanity checking
|
||
#
|
||
# print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
|
||
# print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
|
||
|
||
|
||
try:
|
||
last_mod = time.ctime(os.path.getmtime('cache/' + filename))
|
||
|
||
import pathlib
|
||
prev_stat = pathlib.Path('cache/' + filename).stat()
|
||
mtime = dt.fromtimestamp(prev_stat.st_mtime)
|
||
print(mtime)
|
||
except:
|
||
print("Couldn't Diff.")
|
||
# fname = pathlib.Path('test.py')
|
||
# assert fname.exists(), f'No such file: {fname}' # check that the file exists
|
||
# print(fname.stat())
|
||
#
|
||
# os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413,
|
||
# st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
|
||
|
||
|
||
|
||
codecs.open('cache/' + filename, 'w', 'utf-8').write(jj)
|
||
|
||
put_file('/home/public/schedule/', 'cache/', filename, 0) # /gavilan.edu/_files/php/
|
||
|
||
return as_dict
|
||
|
||
|
||
|
||
|
||
def dza_sched():
|
||
text = codecs.open('cache/sched_fa22_deanza.html','r','utf-8').read()
|
||
as_list = ssb_to_csv(text)
|
||
#print(as_list)
|
||
as_dict = to_section_list(as_list)
|
||
jj = json.dumps(as_dict,indent=2)
|
||
codecs.open('cache/fa22_sched_deanza.json','w','utf-8').write(jj)
|
||
|
||
# recreate schedule json files with most current online schedule format.
|
||
def recent_schedules():
|
||
# # todo: sems is a global in this file. Is that the right thing to do?
|
||
#all_scheds = [ os.listdir( 'cache/rosters/' + short_to_long(s)) for s in sems ]
|
||
#for i,s in enumerate(sems):
|
||
for s in ['sp21',]:
|
||
filename = 'cache/sched_' + s + '.html'
|
||
print("Filename is %s" % filename)
|
||
input = codecs.open( filename, 'r', 'utf-8').read()
|
||
output = ssb_to_csv(input)
|
||
|
||
csv_fn = 'cache/temp_sched_' + s + '.csv'
|
||
if os.path.isfile(csv_fn):
|
||
os.remove(csv_fn)
|
||
|
||
codecs.open(csv_fn,'w','utf-8').write(output)
|
||
|
||
jsn = to_section_list(output)
|
||
jsn_fn = 'cache/semesters/'+short_to_long(s)+'/'+s+'_sched.json'
|
||
if os.path.isfile(jsn_fn):
|
||
os.remove(jsn_fn)
|
||
codecs.open(jsn_fn,'w').write(json.dumps(jsn))
|
||
print("I put the most recent schedule JSON files in ./cache/semesters/... folders.")
|
||
|
||
|
||
|
||
|
||
# Take banner's html and make a csv(?) file
|
||
def ssb_to_csv(src):
|
||
#out = codecs.open(schedfile,'w','utf-8')
|
||
output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
|
||
b = bs(src, 'html.parser')
|
||
tab = b.find(class_="datadisplaytable")
|
||
if not tab:
|
||
print("hmm... didn't find a 'datadisplaytable' in this html: ")
|
||
#print(src)
|
||
return 0
|
||
rows = tab.find_all('tr')
|
||
drows = list(filter(row_has_data,rows))
|
||
for dd in drows:
|
||
t = row_text(dd)
|
||
output += t
|
||
return output
|
||
|
||
|
||
|
||
# take text lines and condense them to one dict per section
|
||
def to_section_list(input_text,verbose=0):
|
||
this_course = ''
|
||
#todo: no output files
|
||
#jout = codecs.open(filename, 'w', 'utf-8')
|
||
#input = csv.DictReader(open(schedfile,'r'))
|
||
#input = UnicodeDictReader(input_text.splitlines())
|
||
all_courses = []
|
||
|
||
|
||
try:
|
||
f = StringIO(input_text)
|
||
except:
|
||
print("ERROR with this input_text:")
|
||
print(input_text)
|
||
reader = csv.reader(f, delimiter=',')
|
||
headers = next(reader)
|
||
for r in reader:
|
||
d = dict(list(zip(headers,r)))
|
||
#pdb.set_trace()
|
||
# clean funny unicode char in blank entries
|
||
r = {k: clean_funny2(v) for k,v in list(d.items()) }
|
||
if verbose: print("Cleaned: " + str(r))
|
||
|
||
if 'time' in r:
|
||
if r['time']=='TBA': r['time'] = ''
|
||
if r['time']: r['partofday'] = time_to_partofday(r['time'])
|
||
|
||
r['type'] = ''
|
||
|
||
if 'loc' in r:
|
||
if r['loc'] == 'ONLINE': r['type'] = 'online'
|
||
if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
|
||
if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
|
||
if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
|
||
|
||
if 'code' in r:
|
||
if re.search(r'ONLINE\sLIVE',r['code']):
|
||
r['type'] = 'online live'
|
||
elif re.search(r'ONLINE',r['code']):
|
||
r['type'] = 'online'
|
||
|
||
# does it have a section? it is the last course
|
||
if r['crn']: # is a new course or a continuation?
|
||
if verbose: print(" it's a new section.")
|
||
if this_course:
|
||
if not this_course['extra']: this_course.pop('extra',None)
|
||
all_courses.append(this_course)
|
||
this_course = r
|
||
#print(r['name'])
|
||
this_course['extra'] = []
|
||
else:
|
||
# is a continuation line
|
||
if verbose: print(" additional meeting: " + str(r))
|
||
for k,v in list(r.items()):
|
||
if not v: r.pop(k,None)
|
||
# TODO: if extra line is different type?
|
||
#if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
|
||
#elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
|
||
this_course['extra'].append(r)
|
||
return all_courses
|
||
|
||
|
||
##
|
||
## SCHEDULE PARSE HELPERS
|
||
##
|
||
##
|
||
def time_to_partofday(t):
|
||
#todo: account for multiple sites/rows
|
||
# 11:20 am-12:10 pm
|
||
mor = strptime('12:00 PM', '%I:%M %p')
|
||
mid = strptime( '2:00 PM', '%I:%M %p')
|
||
aft = strptime( '6:00 PM', '%I:%M %p')
|
||
if t == 'TBA':
|
||
return 'TBA'
|
||
t = t.upper()
|
||
parts = t.split('-')
|
||
try:
|
||
begin = strptime(parts[0], '%I:%M %p')
|
||
end = strptime(parts[1], '%I:%M %p')
|
||
if end > aft:
|
||
return "Evening"
|
||
if end > mid:
|
||
return "Afternoon"
|
||
if end > mor:
|
||
return "Midday"
|
||
return "Morning"
|
||
#return begin,end
|
||
except Exception as e:
|
||
#print 'problem parsing: ', t, " ",
|
||
return ""
|
||
|
||
# Deduce a 'site' field, based on room name and known offsite locations
|
||
def room_to_site(room,verbose=0):
|
||
#todo: account for multiple sites/rows
|
||
#todo: better way to store these offsite labels
|
||
othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')
|
||
# is it gilroy, mh, hol, other, online or hybrid?
|
||
site = 'Gilroy'
|
||
#if len(course[0]) > 13:
|
||
# room = course[0][13]
|
||
if room in othersites:
|
||
site = "Other"
|
||
if room == 'TBA':
|
||
site = 'TBA'
|
||
if room == 'AV':
|
||
site = 'San Martin Airport'
|
||
if re.search('MHG',room):
|
||
site = 'Morgan Hill'
|
||
if re.search('HOL',room):
|
||
site = 'Hollister'
|
||
if re.search('COY',room):
|
||
site = 'Coyote Valley'
|
||
if re.search('OFFSTE',room):
|
||
site = 'Other'
|
||
if re.search('ONLINE',room):
|
||
site = 'Online'
|
||
if verbose: print(room, '\t', end=' ')
|
||
return site
|
||
|
||
|
||
|
||
def row_has_data(r): # helper
|
||
if r.find_all('th'):
|
||
return False
|
||
if len(r.find_all('td')) > 2:
|
||
return True
|
||
if re.search(r'Note\:', r.get_text()):
|
||
return True
|
||
return False
|
||
|
||
def row_text(r): # helper
|
||
#global dbg
|
||
|
||
d("Row Txt Fxn gets: ")
|
||
arr = []
|
||
for t in r.find_all('td'):
|
||
if t.contents and len(t.contents) and t.contents[0].name == 'img':
|
||
arr.append("1")
|
||
d("img")
|
||
r_text = t.get_text()
|
||
arr.append(r_text)
|
||
if 'colspan' in t.attrs and t['colspan']=='2':
|
||
d('[colspan2]')
|
||
arr.append('')
|
||
d("\t"+r_text, end=" ")
|
||
d('')
|
||
|
||
if len(arr)==1 and re.search(r'Note\:',arr[0]):
|
||
note_line = clean_funny( arr[0] )
|
||
note_line = re.sub(r'\n',' ', note_line)
|
||
note_line = re.sub(r'"','', note_line)
|
||
#note_line = re.sub(r',','\,', note_line)
|
||
return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
|
||
del arr[0]
|
||
arr[1] = clean_funny(arr[1])
|
||
arr[2] = clean_funny(arr[2])
|
||
if arr[1]: arr[1] = arr[1] + " " + arr[2]
|
||
del arr[2]
|
||
arr = [ re.sub(r' ','',a) for a in arr]
|
||
arr = [ re.sub(',','. ',a) for a in arr]
|
||
arr = [ re.sub(r'\(P\)','',a) for a in arr]
|
||
arr = [ a.strip() for a in arr]
|
||
#del arr[-1]
|
||
r = ','.join(arr)+'\n'
|
||
r = re.sub(r'\n','',r)
|
||
r = re.sub('add to worksheet','',r)
|
||
d("Row Txt Fxn returns: " + r + "\n\n")
|
||
|
||
return r + '\n'
|
||
|
||
|
||
|
||
|
||
|
||
def clean_funny(str):
|
||
if str and str.encode('utf8') == ' ': return ''
|
||
return str
|
||
def clean_funny2(str):
|
||
if str and str == '\xa0': return ''
|
||
if str and str == ' ': return ''
|
||
return str
|
||
|
||
def clean_funny3(str):
|
||
return re.sub('\xa0','',str)
|
||
|
||
# Go to the semesters folder and read the schedule. Return dataframe
|
||
def get_semester_schedule(short='sp21'): # I used to be current_schedule
|
||
# todo: Some semesters have a different format.... partofday type site xxx i just dL'd them again
|
||
|
||
filename = 'cache/semesters/'+short_to_long(short)+'/' + short + '_sched.json'
|
||
print("opening %s" % filename)
|
||
#openfile = open(filename,'r')
|
||
#a = json.loads(openfile)
|
||
#return pd.DataFrame(a)
|
||
schedule = pd.read_json(filename)
|
||
schedule.teacher = schedule['teacher'].apply(fix_t_name)
|
||
#print schedule['teacher']
|
||
for index,r in schedule.iterrows():
|
||
tch = r['teacher']
|
||
parts = tch.split(' . ')
|
||
if len(parts)>1:
|
||
#print "Multiple teachers: (" + tch + ")"
|
||
schedule.loc[index,'teacher'] = parts[0]
|
||
#print " Fixed original: ", schedule.loc[index]
|
||
|
||
for t in parts[1:]:
|
||
r['teacher'] = t
|
||
schedule.loc[-1] = r
|
||
#print " New row appended: ", schedule.loc[-1]
|
||
schedule = schedule.assign(dept = schedule['code'].apply(split_class_dept))
|
||
schedule = schedule.assign(codenum = schedule['code'].apply(split_class_code))
|
||
schedule = schedule.assign(codeletter = schedule['code'].apply(split_class_code_letter))
|
||
#print(schedule)
|
||
schedule['sem'] = short
|
||
#print schedule.columns
|
||
return schedule
|
||
|
||
|
||
|
||
##
|
||
## CMDLINE INTERACTIVE SEARCH
|
||
##
|
||
##
|
||
|
||
course_types = {'in-person':'IP','hybrid':'H','online':'O','online live':'OL'}
|
||
|
||
def course_to_string(crs):
|
||
|
||
if len(crs['teacher'].split()) == 3:
|
||
crs['teacher'] = crs['teacher'].split()[0] + " " + crs['teacher'].split()[2]
|
||
# crn type loc days start end cred num/cap code name teacher date
|
||
lengths = [5, 3, 7, 5, 6, 6, 4, 9, 13, 35, 25,10]
|
||
items = [ crs[x] for x in 'crn,type,loc,days,time_start,time_end,cred,act,cap,code,name,teacher,date'.split(',')]
|
||
items[1] = course_types[ items[1] ]
|
||
if items[2] in ["ONLINE", "ONLINE LIVE"]: items[2] = ''
|
||
items[6] = items[6][0:3]
|
||
items[7] = f"{items[7]}/{items[8]}"
|
||
if int(crs['wl_act']) != 0: items[7] += f"+{crs['wl_act']}"
|
||
items.pop(8)
|
||
result = " ".join(f"{str(val):{width}}" for val, width in zip(items, lengths))
|
||
return result
|
||
|
||
|
||
def parse_days(str_days):
|
||
# return a list with actual day of week names
|
||
days = []
|
||
if 'M' in str_days:
|
||
days.append('monday')
|
||
if 'T' in str_days:
|
||
days.append('tuesday')
|
||
if 'W' in str_days:
|
||
days.append('wednesday')
|
||
if 'R' in str_days:
|
||
days.append('thursday')
|
||
if 'F' in str_days:
|
||
days.append('friday')
|
||
if 'S' in str_days:
|
||
days.append('saturday')
|
||
return days
|
||
|
||
def parse_courses(filename):
|
||
with open(filename) as f:
|
||
courses = json.load(f)
|
||
|
||
depts, crns, codes, coursenames, locations, teachers, days, now = {}, {}, {}, {}, {}, {}, {}, datetime.now()
|
||
weekdays = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
||
|
||
for course in courses:
|
||
#print(course)
|
||
|
||
dept = course['code'].split()[0]
|
||
depts.setdefault(dept, []).append(course)
|
||
|
||
# Add course to teachers dict
|
||
course['teacher'] = re.sub(r'\s+', ' ', course['teacher'])
|
||
teachers.setdefault(course['teacher'], []).append(course)
|
||
|
||
# add course to codes, crns, coursenames dict
|
||
codes.setdefault(course['code'], []).append(course)
|
||
crns.setdefault(course['crn'], []).append(course)
|
||
coursenames.setdefault(course['name'], []).append(course)
|
||
|
||
if course['type'] != 'in-person': continue
|
||
# Add course to locations dict
|
||
locations.setdefault(course['loc'], []).append(course)
|
||
|
||
# Add course to days dict
|
||
for day in parse_days(course['days']):
|
||
days.setdefault(day, []).append(course)
|
||
|
||
# Check if course is happening now
|
||
if course['time_start'] == '': continue
|
||
if course['time_end'] == '': continue
|
||
start_time = datetime.strptime(course['time_start'], '%H:%M')
|
||
end_time = datetime.strptime(course['time_end'], '%H:%M')
|
||
#if start_time.time() <= now.time() <= end_time.time():
|
||
# print(f"{course['code']} is happening now in {course['loc']}")
|
||
|
||
return depts,crns, codes, coursenames, locations, teachers, days
|
||
|
||
def write_at(row, col, text):
|
||
sys.stdout.write(f"\033[{row};{col}H{text}")
|
||
sys.stdout.flush()
|
||
|
||
def write_search_results(res,columns,rows,maximum=8):
|
||
height = max(len(res),maximum)
|
||
while len(res)<height:
|
||
res.append('')
|
||
for i,L in enumerate(res):
|
||
write_at(rows - i-1, 1, f"{L} ")
|
||
|
||
write_at(rows,0,'')
|
||
|
||
def write_cleared_search_results(columns,rows,maximum=8,):
|
||
for i in range(maximum):
|
||
write_at(rows - i, 1, f" ")
|
||
write_at(rows,0,'')
|
||
|
||
|
||
|
||
|
||
'''examples = 0
|
||
if examples:
|
||
# Get schedule for a specific room on a specific day
|
||
room = 'HU 104'
|
||
day = 'monday'
|
||
print(f"\nSchedule for {room} on {day}:")
|
||
for course in locations[room]:
|
||
if day in parse_days(course['days']):
|
||
print(" " + course_to_string(course))
|
||
|
||
# Get weekly schedule for a specific teacher
|
||
teacher = 'Kimberly J Smith'
|
||
print(f"\nWeekly schedule for {teacher}:")
|
||
for course in teachers[teacher]:
|
||
print(" " + course_to_string(course))'''
|
||
|
||
|
||
|
||
def interactive(allkeys,deptkeys,depts,teacherkeys,teachers, locationkeys,locations,coursenameskeys,coursenames,codeskeys,codes,crnskeys,crns):
|
||
import sys
|
||
columns, rows = os.get_terminal_size()
|
||
|
||
|
||
def getch():
|
||
# Unix
|
||
if sys.platform != 'win32':
|
||
import tty, termios
|
||
fd = sys.stdin.fileno()
|
||
old_settings = termios.tcgetattr(fd)
|
||
try:
|
||
tty.setraw(fd)
|
||
ch = sys.stdin.read(1)
|
||
finally:
|
||
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
|
||
return ch
|
||
|
||
# Windows
|
||
else:
|
||
import msvcrt
|
||
return msvcrt.getch().decode('utf-8')
|
||
|
||
words = { x:{} for x in allkeys.keys() }
|
||
autocomplete = AutoComplete(words=words)
|
||
|
||
|
||
# get search term
|
||
query = ''
|
||
results = []
|
||
while True:
|
||
char = getch()
|
||
#print(repr(char))
|
||
#print("\n\n\n\n\n\n\n\n")
|
||
if char == '\x08':
|
||
query = query[:-1]
|
||
elif char == '\r':
|
||
if query == 'quit': return False
|
||
break
|
||
else:
|
||
query += char
|
||
results = [x[0] for x in autocomplete.search(word=query, max_cost=3, size=5)]
|
||
print(query+" ",end='',flush=True)
|
||
write_search_results(results,columns,rows)
|
||
|
||
write_cleared_search_results(columns,rows)
|
||
print()
|
||
print()
|
||
for (keyset,dataset) in [(deptkeys,depts),(teacherkeys,teachers), (locationkeys,locations),(coursenameskeys,coursenames),(codeskeys,codes),(crnskeys,crns)]:
|
||
if results[0] in keyset:
|
||
real_key = keyset[results[0]]
|
||
if real_key in dataset.keys():
|
||
print(f"\nWeekly schedule for {real_key}:")
|
||
for course in dataset[real_key]:
|
||
print(" " + course_to_string(course))
|
||
|
||
def interactive_schedule_search():
|
||
while True:
|
||
# Read in schedule
|
||
depts, crns, codes, coursenames, locations, teachers, days = parse_courses('cache/sample_semester.json')
|
||
|
||
deptkeys = { x.lower():x for x in depts.keys() }
|
||
teacherkeys = { x.lower():x for x in teachers.keys() }
|
||
locationkeys = { x.lower():x for x in locations.keys() }
|
||
coursenameskeys = { x.lower():x for x in coursenames.keys() }
|
||
codeskeys = { x.lower():x for x in codes.keys() }
|
||
crnskeys = { x.lower():x for x in crns.keys() }
|
||
|
||
lname_first = [x.split() for x in teachers.keys() ]
|
||
d_lnf = { x[-1].lower() + ", " + ' '.join(x[:-1]).lower(): ' '.join(x) for x in lname_first }
|
||
teacherkeys.update(d_lnf)
|
||
|
||
allkeys = {}
|
||
allkeys.update(deptkeys)
|
||
allkeys.update(teacherkeys)
|
||
allkeys.update(locationkeys)
|
||
allkeys.update(coursenameskeys)
|
||
allkeys.update(codeskeys)
|
||
allkeys.update(crnskeys)
|
||
|
||
print("\nEnter your query or 'quit': ")
|
||
if interactive(allkeys,deptkeys,depts,teacherkeys,teachers, locationkeys,locations,coursenameskeys,coursenames,codeskeys,codes,crnskeys,crns) == False: break
|
||
|
||
|
||
## Download all schedules or just most recent
|
||
|
||
def download_fresh_schedules():
|
||
loc = 'cache/schedules'
|
||
folder_path = Path(loc)
|
||
folder_path.mkdir(parents=True, exist_ok=True)
|
||
|
||
from semesters import sems_by_short_name
|
||
|
||
for short,sem in sems_by_short_name.items():
|
||
try:
|
||
print(sem["name"])
|
||
sched = requests.get(f"http://gavilan.cc/schedule/{short}_sched_expanded.json")
|
||
if sched.status_code != 200:
|
||
print(f" not found")
|
||
continue
|
||
schedfile = codecs.open(f"{loc}/{short}_sched_expanded.json","w","utf-8")
|
||
schedfile.write(sched.text)
|
||
schedfile.close()
|
||
except Exception as e:
|
||
print(e)
|
||
|
||
|
||
# clean leading, multiple, and trailing spaces
|
||
def clean_name(t):
|
||
t = t.strip()
|
||
t = re.sub(r'\s+', ' ', t)
|
||
return t
|
||
|
||
|
||
# given their name, a list of courses, provide a summary of some stats
|
||
def summarize_teacher(who,what):
|
||
tally_course_code = defaultdict( int )
|
||
tally_mode = defaultdict( int )
|
||
tally_site = defaultdict( int )
|
||
|
||
tally_depts = defaultdict( int )
|
||
|
||
num_sections = 0
|
||
which_semesters = defaultdict(int)
|
||
|
||
all_sems = funcy.pluck("sem",what)
|
||
all_sems_sis = [ short_to_sis(x) for x in all_sems ]
|
||
all_sems_sis = list( set( all_sems_sis))
|
||
all_sems_sis.sort()
|
||
oldest = sis_to_human(all_sems_sis[0])
|
||
newest = sis_to_human(all_sems_sis[-1])
|
||
|
||
for crs in what:
|
||
tally_course_code[ crs['code'] ] += 1
|
||
tally_mode[ crs['type'] ] += 1
|
||
tally_site[ crs['site'] ] += 1
|
||
num_sections += 1
|
||
which_semesters[ crs['sem']] += 1
|
||
|
||
try:
|
||
c_parts = crs['code'].split(' ')
|
||
dept = c_parts[0]
|
||
tally_depts[dept] += 1
|
||
except:
|
||
pass
|
||
top_two = sorted(tally_depts, key=tally_depts.get, reverse=True)[:2]
|
||
d1 = top_two[0]
|
||
d2 = ''
|
||
if len(top_two)>1: d2 = top_two[1]
|
||
|
||
training = json.loads( codecs.open('cache/gott_by_goo.json','r','utf-8').read() )
|
||
|
||
my_training = {}
|
||
if who in training:
|
||
my_training = training[who]
|
||
|
||
return { 'name':who, 'num_sections':num_sections, 'num_semesters':len(which_semesters.keys()), 'num_years': int(len(which_semesters.keys())/3),
|
||
'earliest_sem': oldest, 'most_recent':newest,
|
||
'courses':tally_course_code,
|
||
'training':my_training,
|
||
'modes':tally_mode, 'sites':tally_site, 'dept':d1, 'dept2':d2 }
|
||
|
||
|
||
def find_goo_for_name(name_dict,name):
|
||
name = clean_name(name)
|
||
if name in name_dict:
|
||
if name_dict[name]:
|
||
print(f"ok {name}")
|
||
return name_dict[name]
|
||
parts = name.split(' ')
|
||
if len(parts) == 3:
|
||
new_name = f"{parts[0]} {parts[2]}"
|
||
if new_name in name_dict and name_dict[new_name]:
|
||
print(f"ok {new_name}")
|
||
return name_dict[new_name]
|
||
print(f"*** no goo number for {name}.")
|
||
return ''
|
||
|
||
# All teachers' teaching history
|
||
# need schedule files to exist and be current here:
|
||
# \canvasapp\cache\schedules
|
||
def teachers_history():
|
||
|
||
# Let's just make a giant dict of names we know about -> G numbers
|
||
big_name_to_goo = defaultdict(str)
|
||
|
||
# all ilearn accounts with gavilan.edu address
|
||
ilearn_names = json.loads( codecs.open('cache/ilearn_staff.json','r','utf-8').read())
|
||
for iln in ilearn_names:
|
||
try:
|
||
big_name_to_goo[iln['name']] = iln['sis_user_id']
|
||
except:
|
||
pass
|
||
|
||
# everyone who's been in teacher role [ name, id, goo, created dt, coursename]
|
||
all_teacher_role = everyone_teacher_role()
|
||
for atr in all_teacher_role:
|
||
big_name_to_goo[atr[0]] = atr[2]
|
||
teachers_by_id = { str(x[1]): [x[0], x[2], x[3] ] for x in all_teacher_role }
|
||
|
||
# names that don't match
|
||
non_matching_names = [ line.strip().split(',') for line in codecs.open('cache/sched_name_to_ilearn_id.csv','r','utf-8').readlines() ]
|
||
for nmn in non_matching_names:
|
||
try:
|
||
big_name_to_goo[nmn[0]] = teachers_by_id[nmn[3]][1]
|
||
except:
|
||
pass
|
||
try:
|
||
big_name_to_goo[nmn[1]] = teachers_by_id[nmn[3]][1]
|
||
except:
|
||
pass
|
||
|
||
#print( sorted(list(big_name_to_goo.keys())) )
|
||
|
||
# we want canonical names also
|
||
big_goo_to_name = {}
|
||
|
||
for nnn,goo in big_name_to_goo.items():
|
||
big_goo_to_name[goo] = nnn
|
||
|
||
|
||
# Define the directory to search
|
||
folder_path = Path("cache/schedules")
|
||
|
||
all_sections_by_goo = defaultdict(list)
|
||
|
||
# Define the regex pattern
|
||
pattern = re.compile(r"(\w\w\d\d)_sched_expanded\.json")
|
||
|
||
i = 0
|
||
|
||
# Iterate through all saved schedule json files
|
||
for file in folder_path.iterdir():
|
||
if file.is_file() and pattern.match(file.name):
|
||
m = pattern.match(file.name)
|
||
sem = m.groups()[0]
|
||
print(sem)
|
||
with file.open("r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
for c in data:
|
||
c['sem'] = sem
|
||
multiples = c['teacher'].split(' . ')
|
||
i += len(multiples)
|
||
if len(multiples) > 1:
|
||
for m in multiples:
|
||
cn = clean_name(m)
|
||
goo = find_goo_for_name(big_name_to_goo,m)
|
||
all_sections_by_goo[goo].append(c)
|
||
#print(f"{sem}\t{cn}\t{goo}\t{c['code']}")
|
||
else:
|
||
cn = clean_name(c['teacher'])
|
||
goo = find_goo_for_name(big_name_to_goo,cn)
|
||
all_sections_by_goo[goo].append(c)
|
||
#print(f"{sem}\t{cn}\t{goo}\t{c['code']}")
|
||
|
||
|
||
#print(json.dumps(teacher_names_raw, indent=2))
|
||
'''goos = sorted(list(all_sections_by_goo.keys()))
|
||
non_match_lookup = { x[0]: x[3].strip() for x in non_matching_names }
|
||
|
||
|
||
for N in goos:
|
||
found = in_list(ilearn_names,N,'name','sis_user_id')
|
||
new_name = ''
|
||
if not found:
|
||
parts = N.split(' ')
|
||
if len(parts) == 3:
|
||
new_name = f"{parts[0]} {parts[2]}"
|
||
found = in_list(ilearn_names,new_name,'name','sis_user_id')
|
||
|
||
try:
|
||
if not found:
|
||
if N in non_match_lookup and non_match_lookup[N]:
|
||
found = teachers_by_id[non_match_lookup[N]][1]
|
||
elif new_name in non_match_lookup and non_match_lookup[new_name]:
|
||
found = teachers_by_id[non_match_lookup[new_name]][1]
|
||
except Exception as e:
|
||
print(f"Exception on {N}: {e}")
|
||
print(f"{found} \t {N}")
|
||
print(f"{i} section/teachers analysed")'''
|
||
|
||
all_summary = []
|
||
|
||
index = []
|
||
|
||
DO_UPLOADS = 0
|
||
if input("Do uploads? (y/n) ") == 'y': DO_UPLOADS = 1
|
||
|
||
dataout = codecs.open('cache/teacherhistory.txt','w','utf-8')
|
||
for G,L in all_sections_by_goo.items():
|
||
#dataout.write(f"{G}\n")
|
||
if G:
|
||
N = big_goo_to_name[G]
|
||
summary = summarize_teacher(G,L)
|
||
summary['name'] = N
|
||
summary['goo'] = G
|
||
summary['sections'] = []
|
||
for course in L:
|
||
dataout.write(f"{G},{N},{course['sem']},{course['code']},{course['cred']},{course['type']},{course['site']},{course['days']}\n")
|
||
summary['sections'].append(f"{G},{N},{course['sem']},{course['code']},{course['cred']},{course['type']},{course['site']},{course['days']}")
|
||
all_summary.append(summary)
|
||
|
||
index.append( [G,N,summary['courses'],summary['training'],summary['most_recent'], summary['dept'],summary['dept2'] ] )
|
||
|
||
teacherout = codecs.open(f"cache/faculty/{G}.json","w","utf-8")
|
||
teacherout.write(json.dumps(summary,indent=2))
|
||
teacherout.close()
|
||
if DO_UPLOADS:
|
||
print(f"uploading {N}")
|
||
put_file(f"/home/public/faculty/", f"cache/faculty/", f"{G}.json", 0)
|
||
|
||
summaryout = codecs.open('cache/teachersummary.json','w','utf-8')
|
||
summaryout.write(json.dumps(all_summary,indent=2))
|
||
summaryout.close()
|
||
|
||
indexout = codecs.open('cache/faculty/index.json','w','utf-8')
|
||
indexout.write(json.dumps(index,indent=2))
|
||
indexout.close()
|
||
|
||
if DO_UPLOADS:
|
||
put_file(f"/home/public/faculty/", f"cache/", f"teachersummary.json", 0)
|
||
put_file(f"/home/public/faculty/", f"cache/faculty/", f"index.json", 0)
|
||
|
||
|
||
def in_list(li,needle,key,ret_key):
|
||
for L in li:
|
||
if L[key] == needle: return L[ret_key]
|
||
return False
|
||
|
||
def time_range(a,b):
|
||
if a and b:
|
||
return f"{add_colon_to_24hr_time(a)}-{add_colon_to_24hr_time(b)}"
|
||
return ""
|
||
|
||
def add_colon_to_24hr_time(the_time):
|
||
if the_time:
|
||
the_time = str(the_time)
|
||
return the_time[0:-2] + ":" + the_time[-2:]
|
||
return ""
|
||
|
||
def date_without_year(d):
|
||
return f"{d.month}/{d.day}"
|
||
|
||
def tchr(a,b):
|
||
if a and b:
|
||
return f"{a} {b}"
|
||
return ""
|
||
|
||
def type_num_to_type_str(typenum):
|
||
typenum = str(typenum)
|
||
if typenum in ["72","20","736","737","73A"]:
|
||
return "online"
|
||
if typenum in ["2","4","45","46","47","04A","04B"]:
|
||
return "in-person"
|
||
if typenum in ["5","40"]:
|
||
return "hybrid"
|
||
if typenum in ["71","73B","73"]:
|
||
return "online live"
|
||
return typenum
|
||
|
||
def excel_schedule():
|
||
|
||
su_xl = 'SU 2025- 3.24.25.xlsx'
|
||
fa_xl = 'FA 2025- 3.24.25.xlsx'
|
||
|
||
infile = fa_xl
|
||
|
||
outfile = 'fa25_sched_expanded.json'
|
||
|
||
|
||
from openpyxl import Workbook, load_workbook
|
||
from openpyxl.chart import BarChart, Series, Reference
|
||
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill
|
||
wb = load_workbook(f"cache/{infile}")
|
||
print(wb.sheetnames)
|
||
|
||
# Select the active sheet (or you can pick a specific one: wb['SheetName'])
|
||
ws = wb['Schedule by Division and Dept -'] #wb.active
|
||
|
||
# Read and print all rows
|
||
#for row in ws.iter_rows(values_only=True):
|
||
# print(row)
|
||
#
|
||
#
|
||
|
||
# Extract header row
|
||
headers = [cell for cell in next(ws.iter_rows(min_row=1, max_row=1, values_only=True))]
|
||
|
||
# Mapping from Excel headers to your desired keys
|
||
header_map = {
|
||
"crn": "CRN",
|
||
"code": "Course",
|
||
"sec": "SeqNo",
|
||
#"PtTerm": "cmp",
|
||
"cred": "Units",
|
||
"name": "Title",
|
||
"days": "Days",
|
||
"time": lambda row: time_range(row[headers.index('BegTime')], row[headers.index('EndTime')]),
|
||
"time_start": lambda row: add_colon_to_24hr_time( row[headers.index["BegTime"]]),
|
||
"time_end": lambda row: add_colon_to_24hr_time( row[headers.index["EndTime"]]),
|
||
"cap": "MaxEnroll",
|
||
"rem": "MaxEnroll",
|
||
"teacher": lambda row: tchr(row[headers.index('FirstName')], row[headers.index('LastName')]),
|
||
"date": lambda row: f"{date_without_year(row[headers.index('StartDate')])}-{date_without_year(row[headers.index('EndDate')])}",
|
||
"start": lambda row: date_without_year(row[headers.index('StartDate')]),
|
||
"end": lambda row: date_without_year(row[headers.index('EndDate')]),
|
||
"loc": lambda row: "ONLINE" if row[headers.index("Bldg")] == "ONLINE" else f"{row[headers.index('Bldg')]} {row[headers.index('Room')]}",
|
||
# Add fixed fields
|
||
"ztc": lambda row: "",
|
||
"note": "Footnote",
|
||
"type": lambda row: type_num_to_type_str(row[headers.index('SchdTyp')]),
|
||
"site": lambda row: "",
|
||
"doy": lambda row: row[headers.index("StartDate")].timetuple().tm_yday if row[headers.index("StartDate")] else "",
|
||
}
|
||
|
||
# Function to convert a row to your dict format
|
||
def convert_row(row):
|
||
data = {}
|
||
for k, v in header_map.items():
|
||
try:
|
||
if isinstance(v, type(lambda: None)):
|
||
d2 = v(row)
|
||
if d2:
|
||
data[k] = str(d2)
|
||
else:
|
||
data[k] = ""
|
||
else:
|
||
d1 = row[ headers.index( header_map[k] ) ]
|
||
if d1:
|
||
data[k] = str(d1)
|
||
else:
|
||
data[k] = ""
|
||
except Exception as e:
|
||
print(f"Exception for key [{k}] value [{v}] on ROW: {row}\n{e}")
|
||
print(json.dumps(data,indent=2))
|
||
return data
|
||
|
||
# Process all data rows
|
||
data_rows = list(ws.iter_rows(min_row=2, values_only=True))
|
||
converted = [convert_row(row) for row in data_rows]
|
||
|
||
print(json.dumps(converted,indent=2))
|
||
with codecs.open(f'cache/schedules/{outfile}','w','utf-8') as ofi:
|
||
ofi.write(json.dumps(converted,indent=2))
|
||
|
||
# wanted: a class's teacher history: everyone who teaches it.
|
||
|
||
if __name__ == "__main__":
|
||
|
||
print ('')
|
||
options = { 1: ['Interactive schedule search',interactive_schedule_search] ,
|
||
2: ['test areas gp and deans',campus_dept_hierarchy] ,
|
||
3: ['download_fresh_schedules', download_fresh_schedules],
|
||
4: ['teachers_history', teachers_history],
|
||
5: ['parse in progress schedule', excel_schedule],
|
||
}
|
||
'''3: ['Fetch rosters on schedule',fetch_current_rosters_auto] ,
|
||
4: ['Compute how registration is filling up classes', schedule_filling] ,
|
||
5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] ,
|
||
6: ['Canvas data: interactive sync', interactive ],
|
||
7: ['Canvas data: automated sync', sync_non_interactive ],
|
||
8: ['Get canvas data 2024 style', canvas_data_2024_run ],
|
||
9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
|
||
16: ['Scrape schedule from ssb', scrape_schedule_multi ],
|
||
14: ['Generate latestart schedule', list_latestarts ],
|
||
15: ['Test ssb calls with python', scrape_schedule_py ],
|
||
10: ['schedule to db', scrape_for_db ],
|
||
11: ['clean argos draft schedule file', argos_data_from_cvc],
|
||
12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
|
||
13: ['Parse deanza schedule', dza_sched ],'''
|
||
|
||
|
||
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
||
resp = int(sys.argv[1])
|
||
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
||
|
||
else:
|
||
print ('')
|
||
for key in options:
|
||
print(str(key) + '.\t' + options[key][0])
|
||
|
||
print('')
|
||
resp = input('Choose: ')
|
||
|
||
# Call the function in the options dict
|
||
options[ int(resp)][1]()
|