756 lines
24 KiB
Python
756 lines
24 KiB
Python
|
||
# This Python file uses the following encoding: windows-1252
|
||
#
|
||
|
||
import datetime, pysftp, codecs, re, time, json, traceback, csv, os
|
||
import pandas as pd
|
||
import boto3
|
||
from botocore.exceptions import ClientError
|
||
from deepdiff import DeepDiff
|
||
from io import StringIO
|
||
from time import strptime
|
||
from dateutil import parser
|
||
from bs4 import BeautifulSoup as bs
|
||
from datetime import datetime as dt
|
||
from selenium import webdriver
|
||
from selenium.webdriver.common.keys import Keys
|
||
from selenium.webdriver.support.ui import WebDriverWait, Select
|
||
from selenium.webdriver.common.by import By
|
||
from selenium.webdriver.support import expected_conditions as EC
|
||
|
||
from canvas_secrets import access_key, access_secret
|
||
from canvas_secrets import FTP_SITE, FTP_USER, FTP_PW
|
||
|
||
|
||
# TODO move out
|
||
|
||
gp = {}
|
||
gp['ACCT'] = 'info'
|
||
gp['AE'] = 'skill'
|
||
gp['AH'] = 'well'
|
||
gp['AJ'] = 'skill'
|
||
gp['AMT'] = 'skill'
|
||
gp['ANTH'] = 'soc'
|
||
gp['APE'] = 'skill'
|
||
gp['ART'] = 'art'
|
||
gp['ASTR'] = 'stem'
|
||
gp['ATH'] = 'well'
|
||
gp['BIO'] = 'stem'
|
||
gp['BOT'] = 'info'
|
||
gp['BUS'] = 'info'
|
||
gp['CD'] = 'skill'
|
||
gp['CHEM'] = 'stem'
|
||
gp['CMGT'] = 'skill'
|
||
gp['CMUN'] = 'comm'
|
||
gp['COS'] = 'skill'
|
||
gp['CSIS'] = 'stem'
|
||
gp['CWE'] = 'skill'
|
||
gp['DM'] = 'info'
|
||
gp['ECOL'] = 'stem'
|
||
gp['ECON'] = 'info'
|
||
gp['ENGL'] = 'soc'
|
||
gp['ENGR'] = 'stem'
|
||
gp['ENVS'] = 'stem'
|
||
gp['ESL'] = 'comm'
|
||
gp['ETHN'] = 'comm'
|
||
gp['FRNH'] = 'comm'
|
||
gp['GEOG'] = 'stem'
|
||
gp['GEOL'] = 'stem'
|
||
gp['GUID'] = 'soc'
|
||
gp['HE'] = 'well'
|
||
gp['HIST'] = 'soc'
|
||
gp['HUM'] = 'soc'
|
||
gp['HVAC'] = 'skill'
|
||
gp['JFT'] = 'skill'
|
||
gp['JLE'] = 'skill'
|
||
gp['JOUR'] = 'comm'
|
||
gp['JPN'] = 'comm'
|
||
gp['KIN'] = 'well'
|
||
gp['LIB'] = 'comm'
|
||
gp['LIFE'] = 'well'
|
||
gp['MATH'] = 'stem'
|
||
gp['MCTV'] = 'art'
|
||
gp['MUS'] = 'art'
|
||
gp['PHIL'] = 'soc'
|
||
gp['PHYS'] = 'stem'
|
||
gp['POLS'] = 'soc'
|
||
gp['PSCI'] = 'stem'
|
||
gp['PSYC'] = 'soc'
|
||
gp['RE'] = 'skill'
|
||
gp['SJS'] = 'soc'
|
||
gp['SOC'] = 'soc'
|
||
gp['SPAN'] = 'comm'
|
||
gp['THEA'] = 'art'
|
||
gp['WELD'] = 'skill'
|
||
gp['WTRM'] = 'skill'
|
||
gp['MGMT'] = 'skill'
|
||
gp['MKTG'] = 'skill'
|
||
gp['HTM'] = 'skill'
|
||
|
||
dean = {}
|
||
dean['AH'] = 'et'
|
||
dean['HE'] = 'et'
|
||
dean['ATH'] = 'et'
|
||
dean['KIN'] = 'et'
|
||
dean['LIFE'] = 'et'
|
||
dean['AE'] = 'ss'
|
||
dean['APE'] = 'ss'
|
||
dean['ACCT'] = 'ss'
|
||
dean['AJ'] = 'ss'
|
||
dean['AMT'] = 'ss'
|
||
dean['HVAC'] = 'ss'
|
||
dean['JFT'] = 'ss'
|
||
dean['JLE'] = 'ss'
|
||
dean['RE'] = 'ss'
|
||
dean['WTRM'] = 'ss'
|
||
dean['WELD'] = 'ss'
|
||
dean['ANTH'] = 'nl'
|
||
dean['ART'] = 'nl'
|
||
dean['ASTR'] = 'jn'
|
||
dean['BIO'] = 'jn'
|
||
dean['BOT'] = 'ss'
|
||
dean['BUS'] = 'ss'
|
||
dean['CD'] = 'ss'
|
||
dean['CHEM'] = 'jn'
|
||
dean['CMGT'] = 'ss'
|
||
dean['CMUN'] = 'nl'
|
||
dean['COS'] = 'ss'
|
||
dean['CSIS'] = 'ss'
|
||
dean['CWE'] = 'ss'
|
||
dean['DM'] = 'ss'
|
||
dean['ECOL'] = 'jn'
|
||
dean['ECON'] = 'ss'
|
||
dean['ENGL'] = 'nl'
|
||
dean['ENGR'] = 'jn'
|
||
dean['ENVS'] = 'jn'
|
||
dean['ESL'] = 'ss'
|
||
dean['ETHN'] = 'nl'
|
||
dean['FRNH'] = 'nl'
|
||
dean['GEOG'] = 'jn'
|
||
dean['GEOL'] = 'jn'
|
||
dean['GUID'] = 'de'
|
||
dean['HIST'] = 'nl'
|
||
dean['HUM'] = 'nl'
|
||
dean['JOUR'] = 'nl'
|
||
dean['JPN'] = 'nl'
|
||
dean['LIB'] = 'jn'
|
||
dean['MATH'] = 'jn'
|
||
dean['MCTV'] = 'nl'
|
||
dean['MGMT'] = 'ss'
|
||
dean['MKTG'] = 'ss'
|
||
dean['HTM'] = 'ss'
|
||
dean['MUS'] = 'nl'
|
||
dean['PHIL'] = 'nl'
|
||
dean['PHYS'] = 'jn'
|
||
dean['POLS'] = 'nl'
|
||
dean['PSCI'] = 'jn'
|
||
dean['PSYC'] = 'nl'
|
||
dean['PSYCH'] = 'nl'
|
||
dean['SJS'] = 'nl'
|
||
dean['SOC'] = 'nl'
|
||
dean['SPAN'] = 'nl'
|
||
dean['THEA'] = 'nl'
|
||
|
||
dean_names = {}
|
||
dean_names['et'] = 'Enna Trevathan'
|
||
dean_names['ss'] = 'Susan Sweeney'
|
||
dean_names['nl'] = 'Noah Lystrup'
|
||
dean_names['jn'] = 'Jennifer Nari'
|
||
dean_names['de'] = 'Diego Espinoza'
|
||
|
||
|
||
def get_secret():
|
||
|
||
secret_name = "gav/goo/cred"
|
||
region_name = "us-west-1"
|
||
|
||
# Create a Secrets Manager client
|
||
session = boto3.session.Session(
|
||
aws_access_key_id = access_key,
|
||
aws_secret_access_key = access_secret
|
||
)
|
||
client = session.client(
|
||
service_name='secretsmanager',
|
||
region_name=region_name
|
||
)
|
||
|
||
try:
|
||
get_secret_value_response = client.get_secret_value(
|
||
SecretId=secret_name
|
||
)
|
||
except ClientError as e:
|
||
# For a list of exceptions thrown, see
|
||
# https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
|
||
raise e
|
||
|
||
secret = json.loads(get_secret_value_response['SecretString'])
|
||
return (secret['user'],secret['password'])
|
||
|
||
DEBUG = 0
|
||
GOO, GOO_PIN = get_secret()
|
||
|
||
|
||
|
||
def d(s,end=''):
|
||
global DEBUG
|
||
if end and DEBUG: print(s,end=end)
|
||
elif DEBUG: print(s)
|
||
|
||
def clean_funny(str):
|
||
if str and str.encode('utf8') == '<EFBFBD>': return ''
|
||
return str
|
||
|
||
def clean_funny2(str):
|
||
if str and str == '\xa0': return ''
|
||
if str and str == '<EFBFBD>': return ''
|
||
return str
|
||
|
||
|
||
def row_has_data(r): # helper
|
||
if r.find_all('th'):
|
||
return False
|
||
if len(r.find_all('td')) > 2:
|
||
return True
|
||
if re.search('Note\:', r.get_text()):
|
||
return True
|
||
return False
|
||
|
||
def time_to_partofday(t):
|
||
#todo: account for multiple sites/rows
|
||
# 11:20 am-12:10 pm
|
||
mor = strptime('12:00 PM', '%I:%M %p')
|
||
mid = strptime( '2:00 PM', '%I:%M %p')
|
||
aft = strptime( '6:00 PM', '%I:%M %p')
|
||
if t == 'TBA':
|
||
return 'TBA'
|
||
t = t.upper()
|
||
parts = t.split('-')
|
||
try:
|
||
begin = strptime(parts[0], '%I:%M %p')
|
||
end = strptime(parts[1], '%I:%M %p')
|
||
if end > aft:
|
||
return "Evening"
|
||
if end > mid:
|
||
return "Afternoon"
|
||
if end > mor:
|
||
return "Midday"
|
||
return "Morning"
|
||
#return begin,end
|
||
except Exception as e:
|
||
#print 'problem parsing: ', t, " ",
|
||
return ""
|
||
|
||
# Deduce a 'site' field, based on room name and known offsite locations
|
||
def room_to_site(room,verbose=0):
|
||
#todo: account for multiple sites/rows
|
||
#todo: better way to store these offsite labels
|
||
othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')
|
||
# is it gilroy, mh, hol, other, online or hybrid?
|
||
site = 'Gilroy'
|
||
#if len(course[0]) > 13:
|
||
# room = course[0][13]
|
||
if room in othersites:
|
||
site = "Other"
|
||
if room == 'TBA':
|
||
site = 'TBA'
|
||
if room == 'AV':
|
||
site = 'San Martin Airport'
|
||
if re.search('MHG',room):
|
||
site = 'Morgan Hill'
|
||
if re.search('HOL',room):
|
||
site = 'Hollister'
|
||
if re.search('COY',room):
|
||
site = 'Coyote Valley'
|
||
if re.search('OFFSTE',room):
|
||
site = 'Other'
|
||
if re.search('ONLINE',room):
|
||
site = 'Online'
|
||
if verbose: print(room, '\t', end=' ')
|
||
return site
|
||
|
||
|
||
# Take banner's html and make a csv(?) file
|
||
def ssb_to_csv(src):
|
||
#out = codecs.open(schedfile,'w','utf-8')
|
||
output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
|
||
b = bs(src, 'html.parser')
|
||
tab = b.find(class_="datadisplaytable")
|
||
if not tab:
|
||
print("hmm... didn't find a 'datadisplaytable' in this html: ")
|
||
#print(src)
|
||
return 0
|
||
rows = tab.find_all('tr')
|
||
drows = list(filter(row_has_data,rows))
|
||
for dd in drows:
|
||
t = row_text(dd)
|
||
output += t
|
||
return output
|
||
|
||
def row_text(r): # helper
|
||
#global dbg
|
||
|
||
d("Row Txt Fxn gets: ")
|
||
arr = []
|
||
for t in r.find_all('td'):
|
||
if t.contents and len(t.contents) and t.contents[0].name == 'img':
|
||
arr.append("1")
|
||
d("img")
|
||
r_text = t.get_text()
|
||
arr.append(r_text)
|
||
if 'colspan' in t.attrs and t['colspan']=='2':
|
||
d('[colspan2]')
|
||
arr.append('')
|
||
d("\t"+r_text, end=" ")
|
||
d('')
|
||
|
||
if len(arr)==1 and re.search('Note\:',arr[0]):
|
||
note_line = clean_funny( arr[0] )
|
||
note_line = re.sub(r'\n',' ', note_line)
|
||
note_line = re.sub(r'"','', note_line)
|
||
#note_line = re.sub(r',','\,', note_line)
|
||
return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
|
||
del arr[0]
|
||
arr[1] = clean_funny(arr[1])
|
||
arr[2] = clean_funny(arr[2])
|
||
if arr[1]: arr[1] = arr[1] + " " + arr[2]
|
||
del arr[2]
|
||
arr = [ re.sub(r' ','',a) for a in arr]
|
||
arr = [ re.sub(',','. ',a) for a in arr]
|
||
arr = [ re.sub('\(P\)','',a) for a in arr]
|
||
arr = [ a.strip() for a in arr]
|
||
#del arr[-1]
|
||
r = ','.join(arr)+'\n'
|
||
r = re.sub('\n','',r)
|
||
r = re.sub('add to worksheet','',r)
|
||
d("Row Txt Fxn returns: " + r + "\n\n")
|
||
|
||
return r + '\n'
|
||
|
||
|
||
# take text lines and condense them to one dict per section
|
||
def to_section_list(input_text,verbose=0):
|
||
this_course = ''
|
||
#todo: no output files
|
||
#jout = codecs.open(filename, 'w', 'utf-8')
|
||
#input = csv.DictReader(open(schedfile,'r'))
|
||
#input = UnicodeDictReader(input_text.splitlines())
|
||
all_courses = []
|
||
|
||
try:
|
||
f = StringIO(input_text)
|
||
except:
|
||
print("ERROR with this input_text:")
|
||
print(input_text)
|
||
reader = csv.reader(f, delimiter=',')
|
||
headers = next(reader)
|
||
for r in reader:
|
||
d = dict(list(zip(headers,r)))
|
||
#pdb.set_trace()
|
||
# clean funny unicode char in blank entries
|
||
r = {k: clean_funny2(v) for k,v in list(d.items()) }
|
||
if verbose: print("Cleaned: " + str(r))
|
||
|
||
if 'time' in r:
|
||
if r['time']=='TBA': r['time'] = ''
|
||
if r['time']: r['partofday'] = time_to_partofday(r['time'])
|
||
|
||
r['type'] = ''
|
||
|
||
if 'loc' in r:
|
||
if r['loc'] == 'ONLINE': r['type'] = 'online'
|
||
if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
|
||
if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
|
||
if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
|
||
|
||
if 'code' in r:
|
||
if re.search(r'ONLINE\sLIVE',r['code']):
|
||
r['type'] = 'online live'
|
||
elif re.search(r'ONLINE',r['code']):
|
||
r['type'] = 'online'
|
||
|
||
# does it have a section? it is the last course
|
||
if r['crn']: # is a new course or a continuation?
|
||
if verbose: print(" it's a new section.")
|
||
if this_course:
|
||
if not this_course['extra']: this_course.pop('extra',None)
|
||
all_courses.append(this_course)
|
||
this_course = r
|
||
#print(r['name'])
|
||
this_course['extra'] = []
|
||
else:
|
||
# is a continuation line
|
||
if verbose: print(" additional meeting: " + str(r))
|
||
for k,v in list(r.items()):
|
||
if not v: r.pop(k,None)
|
||
# TODO: if extra line is different type?
|
||
#if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
|
||
#elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
|
||
this_course['extra'].append(r)
|
||
return all_courses
|
||
|
||
|
||
# Schedule / course filling history
|
||
# csv headers: crn, code, teacher, datetime, cap, act, wlcap, wlact
|
||
# Log the history of enrollments per course during registration
|
||
def log_section_filling(current_sched_list):
|
||
rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
|
||
rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
|
||
print(rows_j)
|
||
now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
|
||
csv_fn = 'cache/reg_history_' + short_sem + '.csv'
|
||
with codecs.open(csv_fn,'a','utf-8') as f:
|
||
writer = csv.writer(f)
|
||
for S in current_sched_list:
|
||
#print(S)
|
||
items = [now,]
|
||
[ items.append( S[X] ) for X in rows_j ]
|
||
writer.writerow(items)
|
||
|
||
# Same as above, but compressed, act only
|
||
def log_section_filling2(current_sched_list):
|
||
now = datetime.datetime.now().strftime('%Y-%m-%dT%H')
|
||
|
||
todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
|
||
#print(todays_data)
|
||
|
||
todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
|
||
todays_df = todays_df.rename_axis('crn')
|
||
#print(todays_df)
|
||
todays_df.to_csv('cache/reg_today_new.csv', index=True)
|
||
|
||
try:
|
||
myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
|
||
print(myframe)
|
||
except:
|
||
fff = open('cache/reg_data_'+short_sem+'.csv','w')
|
||
fff.write('crn\n')
|
||
fff.close()
|
||
myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
|
||
#myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
|
||
#myframe = myframe.rename_axis('crn')
|
||
print("Creating new data file for this semester.")
|
||
|
||
new_df = myframe.join( todays_df, on='crn', how='outer' )
|
||
new_df = new_df.rename_axis('crn')
|
||
print(new_df)
|
||
|
||
reg_data_filename = 'reg_data_' + short_sem + '.csv'
|
||
new_df.to_csv('cache/' + reg_data_filename, index=False)
|
||
put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0)
|
||
|
||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||
def list_latestarts(term):
|
||
|
||
show_summary = 1
|
||
|
||
the_year = '20' + term[2:4]
|
||
print("year: ", the_year, " semester: ", term)
|
||
|
||
term_in = "cache/%s_sched.json" % term
|
||
term_out = "cache/%s_latestarts.txt" % term
|
||
expanded_out = "%s_sched_expanded.json" % term
|
||
print("Writing output to " + term_out)
|
||
infile = codecs.open(term_in, "r", "utf-8")
|
||
outfile = codecs.open(term_out, "w", "utf-8")
|
||
exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
|
||
expanded = []
|
||
sched = json.loads(infile.read())
|
||
#print sched
|
||
by_date = {}
|
||
|
||
if show_summary: print("course \t loc \t type \t time")
|
||
|
||
for C in sched:
|
||
if (not C['type']) and C['loc'] != 'ONLINE': # and C['time']:
|
||
C['type'] = 'in-person'
|
||
|
||
if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
|
||
|
||
if 'extra' in C:
|
||
if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
|
||
C['type'] = 'hybrid'
|
||
|
||
times = C['time'].split("-")
|
||
if len(times) > 1:
|
||
time_start = times[0]
|
||
time_end = times[1]
|
||
|
||
try:
|
||
startt = time.strptime(time_start,"%I:%M %p")
|
||
endt = time.strptime(time_end,"%I:%M %p")
|
||
min_start = startt.tm_min
|
||
min_end = endt.tm_min
|
||
if min_start == 0: min_start = "00"
|
||
else: min_start = str(min_start)
|
||
if min_end == 0: min_end = "00"
|
||
else: min_end = str(min_end)
|
||
C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
|
||
C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
|
||
if 0:
|
||
print("+ Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
|
||
except Exception as e:
|
||
print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
|
||
else:
|
||
C['time_start'] = ''
|
||
C['time_end'] = ''
|
||
|
||
if re.search('TBA',C['date']):
|
||
C['start'] = ''
|
||
C['end'] = ''
|
||
C['doy'] = ''
|
||
expanded.append(C)
|
||
continue
|
||
|
||
parts = C['date'].split("-")
|
||
start = parts[0] + "/" + the_year
|
||
end = parts[1] + "/" + the_year
|
||
|
||
try:
|
||
startd = parser.parse(start)
|
||
endd = parser.parse(end)
|
||
C['start'] = "%i-%i" % (startd.month,startd.day)
|
||
C['end'] = "%i-%i" % (endd.month,endd.day)
|
||
C['doy'] = startd.timetuple().tm_yday
|
||
expanded.append(C)
|
||
except Exception as e:
|
||
print(e, "\n-- problem parsing ", start, " or ", end)
|
||
if not startd in by_date:
|
||
by_date[startd] = []
|
||
by_date[startd].append(C)
|
||
|
||
exoutfile.write( json.dumps(expanded,indent=2) )
|
||
exoutfile.close()
|
||
put_file('/home/public/schedule/', 'cache/', expanded_out, 0)
|
||
|
||
for X in sorted(by_date.keys()):
|
||
#print("Start: ", X)
|
||
if len(by_date[X]) < 200:
|
||
prettydate = X.strftime("%A, %B %d")
|
||
#print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
||
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
||
for Y in by_date[X]:
|
||
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
||
#print(Y)
|
||
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
||
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
||
outfile.close()
|
||
put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)
|
||
return expanded
|
||
|
||
|
||
# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
|
||
def scrape_schedule():
|
||
#url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu"
|
||
#url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
|
||
url = "https://lum-prod.ec.gavilan.edu/"
|
||
text = ''
|
||
|
||
|
||
#url = f"https://ssb-prod.ec.gavilan.edu/PROD/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term={sem_code}"
|
||
|
||
try:
|
||
driver = webdriver.Firefox()
|
||
driver.get(url)
|
||
driver.implicitly_wait(15)
|
||
driver.find_element("id","usernameUserInput").clear()
|
||
driver.find_element("id","usernameUserInput").send_keys(GOO)
|
||
driver.find_element("name","password").send_keys(GOO_PIN)
|
||
#driver.find_element("name","loginform").submit()
|
||
# enter pw for mygav login
|
||
driver.find_element("xpath","/html/body/div[1]/div/div/div/form/div[3]/div/button").click()
|
||
driver.implicitly_wait(10)
|
||
|
||
# click student tab
|
||
print(driver.title)
|
||
driver.find_element("xpath","/html/body/div[2]/nav/div/div[1]/ul/li[3]/a/span").click()
|
||
driver.implicitly_wait(10)
|
||
|
||
# Setup wait for later
|
||
wait = WebDriverWait(driver, 10)
|
||
original_window = driver.current_window_handle
|
||
|
||
# click ssb button. Opens a new tab
|
||
driver.implicitly_wait(10)
|
||
driver.find_element("xpath","/html/body/div[2]/div/div[3]/div/div[1]/div/div[1]/section/div/div/div/div[1]/div/div/div/h5/a").click()
|
||
driver.implicitly_wait(20)
|
||
|
||
# Wait for the new window or tab
|
||
wait.until(EC.number_of_windows_to_be(2))
|
||
|
||
# Loop through until we find a new window handle
|
||
for window_handle in driver.window_handles:
|
||
if window_handle != original_window:
|
||
driver.switch_to.window(window_handle)
|
||
break
|
||
|
||
print(driver.title)
|
||
|
||
driver.find_element(By.LINK_TEXT, "Students").click()
|
||
driver.implicitly_wait(20)
|
||
print(driver.title)
|
||
|
||
driver.find_element(By.LINK_TEXT, "Registration").click()
|
||
driver.implicitly_wait(10)
|
||
print(driver.title)
|
||
|
||
driver.find_element(By.LINK_TEXT, "Search for Classes").click()
|
||
driver.implicitly_wait(15)
|
||
print(driver.title)
|
||
|
||
dd = Select(driver.find_element("name","p_term"))
|
||
if (dd):
|
||
dd.select_by_visible_text(SEMESTER)
|
||
driver.find_element("xpath","/html/body/div/div[4]/form").submit()
|
||
driver.implicitly_wait(15)
|
||
print(driver.title)
|
||
|
||
driver.find_element("xpath","/html/body/div/div[4]/form/input[18]").click()
|
||
driver.implicitly_wait(10)
|
||
print(driver.title)
|
||
|
||
driver.find_element("name","SUB_BTN").click()
|
||
driver.implicitly_wait(40)
|
||
time.sleep(15)
|
||
driver.implicitly_wait(40)
|
||
print(driver.title)
|
||
text = driver.page_source
|
||
driver.quit()
|
||
|
||
except Exception as e:
|
||
print("Got an exception: ", e)
|
||
print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()[-1][1]))
|
||
return
|
||
finally:
|
||
print("")
|
||
#driver.quit()
|
||
|
||
codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
|
||
|
||
|
||
#print(text)
|
||
as_list = ssb_to_csv(text)
|
||
#print(as_list)
|
||
as_dict = to_section_list(as_list)
|
||
jj = json.dumps(as_dict,indent=2)
|
||
|
||
# TODO
|
||
try:
|
||
print("Opening " + 'cache/'+filename)
|
||
ps = codecs.open('cache/'+filename,'r','utf-8')
|
||
prev_sched = json.loads(ps.read())
|
||
ps.close()
|
||
print("ok")
|
||
|
||
if 1: # sometimes I want to re-run this without affecting the logs.
|
||
log_section_filling(as_dict)
|
||
log_section_filling2(as_dict)
|
||
|
||
dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
|
||
print("diff done")
|
||
pretty_json = json.dumps( json.loads( dd.to_json() ), indent=2 )
|
||
codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write( pretty_json ) # dd.to_json() )
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
print("Can't do diff?")
|
||
|
||
# Next, rename the prev sched_xxYY.json data file to have its date,
|
||
# make this new one, and then upload it to the website.
|
||
# Maybe even count the entries and do a little sanity checking
|
||
#
|
||
# print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
|
||
# print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
|
||
|
||
try:
|
||
last_mod = time.ctime(os.path.getmtime('cache/' + filename))
|
||
|
||
import pathlib
|
||
prev_stat = pathlib.Path('cache/' + filename).stat()
|
||
mtime = dt.fromtimestamp(prev_stat.st_mtime)
|
||
print(mtime)
|
||
except:
|
||
print("Couldn't Diff.")
|
||
# fname = pathlib.Path('test.py')
|
||
# assert fname.exists(), f'No such file: {fname}' # check that the file exists
|
||
# print(fname.stat())
|
||
#
|
||
# os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413,
|
||
# st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
|
||
|
||
|
||
|
||
codecs.open('cache/' + filename, 'w', 'utf-8').write(jj)
|
||
|
||
put_file('/home/public/schedule/', 'cache/', filename, 0) # /gavilan.edu/_files/php/
|
||
|
||
return as_dict
|
||
|
||
|
||
|
||
|
||
def scrape_schedule_multi():
|
||
|
||
global SEMESTER, short_sem, semester_begin, filename, filename_html, sem_code
|
||
|
||
|
||
SEMESTER = 'Fall 2024'
|
||
sem_code = '202470'
|
||
short_sem = 'fa24'
|
||
semester_begin = strptime('08/26', '%m/%d')
|
||
filename = 'fa24_sched.json'
|
||
filename_html = 'fa24_sched.html'
|
||
|
||
as_dict = scrape_schedule()
|
||
|
||
expanded = list_latestarts(short_sem)
|
||
fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
|
||
|
||
ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
|
||
with ffcsv as csvfile:
|
||
csvwriter = csv.writer(csvfile)
|
||
csvwriter.writerow(fields)
|
||
|
||
for S in expanded:
|
||
parts = S['code'].split(' ')
|
||
S['dept'] = parts[0]
|
||
S['num'] = parts[1]
|
||
S['gp'] = gp[parts[0]]
|
||
S['dean'] = dean[parts[0]]
|
||
S['sem'] = short_sem
|
||
# S['act'] = S['cap']
|
||
if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
|
||
csvwriter.writerow( [ S[x] for x in fields ] )
|
||
|
||
put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0)
|
||
|
||
################
|
||
################ SENDING DATA AWAY
|
||
################
|
||
################
|
||
################
|
||
|
||
# Upload a json file to www
|
||
def put_file(remotepath,localpath, localfile,prompt=1):
|
||
show_all = 0
|
||
folder = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
||
cnopts = pysftp.CnOpts()
|
||
cnopts.hostkeys = None
|
||
with pysftp.Connection(FTP_SITE,username=FTP_USER, password=FTP_PW,cnopts=cnopts) as sftp:
|
||
#todo: these paths
|
||
#files = sftp.listdir()
|
||
#print(folder + "\tI see these files on remote: ", files, "\n")
|
||
sftp.chdir(remotepath)
|
||
files = sftp.listdir()
|
||
if show_all: print(folder + "\tI see these files on remote: ", files, "\n")
|
||
|
||
localf = os.listdir(localpath)
|
||
|
||
if show_all: print("I see these local: ", localf)
|
||
|
||
if prompt:
|
||
input('ready to upload')
|
||
sftp.put(localpath+localfile, localfile, preserve_mtime=True)
|
||
sftp.close()
|
||
print("Uploaded %s" % localfile)
|
||
|
||
scrape_schedule_multi() |