diff --git a/aws.py b/aws.py new file mode 100644 index 0000000..970fdd1 --- /dev/null +++ b/aws.py @@ -0,0 +1,756 @@ + +# This Python file uses the following encoding: windows-1252 +# + +import datetime, pysftp, codecs, re, time, json, traceback, csv, os +import pandas as pd +import boto3 +from botocore.exceptions import ClientError +from deepdiff import DeepDiff +from io import StringIO +from time import strptime +from dateutil import parser +from bs4 import BeautifulSoup as bs +from datetime import datetime as dt +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait, Select +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC + +from canvas_secrets import access_key, access_secret +from canvas_secrets import FTP_SITE, FTP_USER, FTP_PW + + +# TODO move out + +gp = {} +gp['ACCT'] = 'info' +gp['AE'] = 'skill' +gp['AH'] = 'well' +gp['AJ'] = 'skill' +gp['AMT'] = 'skill' +gp['ANTH'] = 'soc' +gp['APE'] = 'skill' +gp['ART'] = 'art' +gp['ASTR'] = 'stem' +gp['ATH'] = 'well' +gp['BIO'] = 'stem' +gp['BOT'] = 'info' +gp['BUS'] = 'info' +gp['CD'] = 'skill' +gp['CHEM'] = 'stem' +gp['CMGT'] = 'skill' +gp['CMUN'] = 'comm' +gp['COS'] = 'skill' +gp['CSIS'] = 'stem' +gp['CWE'] = 'skill' +gp['DM'] = 'info' +gp['ECOL'] = 'stem' +gp['ECON'] = 'info' +gp['ENGL'] = 'soc' +gp['ENGR'] = 'stem' +gp['ENVS'] = 'stem' +gp['ESL'] = 'comm' +gp['ETHN'] = 'comm' +gp['FRNH'] = 'comm' +gp['GEOG'] = 'stem' +gp['GEOL'] = 'stem' +gp['GUID'] = 'soc' +gp['HE'] = 'well' +gp['HIST'] = 'soc' +gp['HUM'] = 'soc' +gp['HVAC'] = 'skill' +gp['JFT'] = 'skill' +gp['JLE'] = 'skill' +gp['JOUR'] = 'comm' +gp['JPN'] = 'comm' +gp['KIN'] = 'well' +gp['LIB'] = 'comm' +gp['LIFE'] = 'well' +gp['MATH'] = 'stem' +gp['MCTV'] = 'art' +gp['MUS'] = 'art' +gp['PHIL'] = 'soc' +gp['PHYS'] = 'stem' +gp['POLS'] = 'soc' +gp['PSCI'] = 'stem' +gp['PSYC'] = 'soc' +gp['RE'] = 'skill' +gp['SJS'] = 'soc' +gp['SOC'] = 'soc' +gp['SPAN'] = 'comm' +gp['THEA'] = 'art' +gp['WELD'] = 'skill' +gp['WTRM'] = 'skill' +gp['MGMT'] = 'skill' +gp['MKTG'] = 'skill' +gp['HTM'] = 'skill' + +dean = {} +dean['AH'] = 'et' +dean['HE'] = 'et' +dean['ATH'] = 'et' +dean['KIN'] = 'et' +dean['LIFE'] = 'et' +dean['AE'] = 'ss' +dean['APE'] = 'ss' +dean['ACCT'] = 'ss' +dean['AJ'] = 'ss' +dean['AMT'] = 'ss' +dean['HVAC'] = 'ss' +dean['JFT'] = 'ss' +dean['JLE'] = 'ss' +dean['RE'] = 'ss' +dean['WTRM'] = 'ss' +dean['WELD'] = 'ss' +dean['ANTH'] = 'nl' +dean['ART'] = 'nl' +dean['ASTR'] = 'jn' +dean['BIO'] = 'jn' +dean['BOT'] = 'ss' +dean['BUS'] = 'ss' +dean['CD'] = 'ss' +dean['CHEM'] = 'jn' +dean['CMGT'] = 'ss' +dean['CMUN'] = 'nl' +dean['COS'] = 'ss' +dean['CSIS'] = 'ss' +dean['CWE'] = 'ss' +dean['DM'] = 'ss' +dean['ECOL'] = 'jn' +dean['ECON'] = 'ss' +dean['ENGL'] = 'nl' +dean['ENGR'] = 'jn' +dean['ENVS'] = 'jn' +dean['ESL'] = 'ss' +dean['ETHN'] = 'nl' +dean['FRNH'] = 'nl' +dean['GEOG'] = 'jn' +dean['GEOL'] = 'jn' +dean['GUID'] = 'de' +dean['HIST'] = 'nl' +dean['HUM'] = 'nl' +dean['JOUR'] = 'nl' +dean['JPN'] = 'nl' +dean['LIB'] = 'jn' +dean['MATH'] = 'jn' +dean['MCTV'] = 'nl' +dean['MGMT'] = 'ss' +dean['MKTG'] = 'ss' +dean['HTM'] = 'ss' +dean['MUS'] = 'nl' +dean['PHIL'] = 'nl' +dean['PHYS'] = 'jn' +dean['POLS'] = 'nl' +dean['PSCI'] = 'jn' +dean['PSYC'] = 'nl' +dean['PSYCH'] = 'nl' +dean['SJS'] = 'nl' +dean['SOC'] = 'nl' +dean['SPAN'] = 'nl' +dean['THEA'] = 'nl' + +dean_names = {} +dean_names['et'] = 'Enna Trevathan' +dean_names['ss'] = 'Susan Sweeney' +dean_names['nl'] = 'Noah Lystrup' +dean_names['jn'] = 'Jennifer Nari' +dean_names['de'] = 'Diego Espinoza' + + +def get_secret(): + + secret_name = "gav/goo/cred" + region_name = "us-west-1" + + # Create a Secrets Manager client + session = boto3.session.Session( + aws_access_key_id = access_key, + aws_secret_access_key = access_secret + ) + client = session.client( + service_name='secretsmanager', + region_name=region_name + ) + + try: + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + except ClientError as e: + # For a list of exceptions thrown, see + # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html + raise e + + secret = json.loads(get_secret_value_response['SecretString']) + return (secret['user'],secret['password']) + +DEBUG = 0 +GOO, GOO_PIN = get_secret() + + + +def d(s,end=''): + global DEBUG + if end and DEBUG: print(s,end=end) + elif DEBUG: print(s) + +def clean_funny(str): + if str and str.encode('utf8') == '': return '' + return str + +def clean_funny2(str): + if str and str == '\xa0': return '' + if str and str == '': return '' + return str + + +def row_has_data(r): # helper + if r.find_all('th'): + return False + if len(r.find_all('td')) > 2: + return True + if re.search('Note\:', r.get_text()): + return True + return False + +def time_to_partofday(t): + #todo: account for multiple sites/rows + # 11:20 am-12:10 pm + mor = strptime('12:00 PM', '%I:%M %p') + mid = strptime( '2:00 PM', '%I:%M %p') + aft = strptime( '6:00 PM', '%I:%M %p') + if t == 'TBA': + return 'TBA' + t = t.upper() + parts = t.split('-') + try: + begin = strptime(parts[0], '%I:%M %p') + end = strptime(parts[1], '%I:%M %p') + if end > aft: + return "Evening" + if end > mid: + return "Afternoon" + if end > mor: + return "Midday" + return "Morning" + #return begin,end + except Exception as e: + #print 'problem parsing: ', t, " ", + return "" + +# Deduce a 'site' field, based on room name and known offsite locations +def room_to_site(room,verbose=0): + #todo: account for multiple sites/rows + #todo: better way to store these offsite labels + othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',') + # is it gilroy, mh, hol, other, online or hybrid? + site = 'Gilroy' + #if len(course[0]) > 13: + # room = course[0][13] + if room in othersites: + site = "Other" + if room == 'TBA': + site = 'TBA' + if room == 'AV': + site = 'San Martin Airport' + if re.search('MHG',room): + site = 'Morgan Hill' + if re.search('HOL',room): + site = 'Hollister' + if re.search('COY',room): + site = 'Coyote Valley' + if re.search('OFFSTE',room): + site = 'Other' + if re.search('ONLINE',room): + site = 'Online' + if verbose: print(room, '\t', end=' ') + return site + + +# Take banner's html and make a csv(?) file +def ssb_to_csv(src): + #out = codecs.open(schedfile,'w','utf-8') + output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n' + b = bs(src, 'html.parser') + tab = b.find(class_="datadisplaytable") + if not tab: + print("hmm... didn't find a 'datadisplaytable' in this html: ") + #print(src) + return 0 + rows = tab.find_all('tr') + drows = list(filter(row_has_data,rows)) + for dd in drows: + t = row_text(dd) + output += t + return output + +def row_text(r): # helper + #global dbg + + d("Row Txt Fxn gets: ") + arr = [] + for t in r.find_all('td'): + if t.contents and len(t.contents) and t.contents[0].name == 'img': + arr.append("1") + d("img") + r_text = t.get_text() + arr.append(r_text) + if 'colspan' in t.attrs and t['colspan']=='2': + d('[colspan2]') + arr.append('') + d("\t"+r_text, end=" ") + d('') + + if len(arr)==1 and re.search('Note\:',arr[0]): + note_line = clean_funny( arr[0] ) + note_line = re.sub(r'\n',' ', note_line) + note_line = re.sub(r'"','', note_line) + #note_line = re.sub(r',','\,', note_line) + return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n' + del arr[0] + arr[1] = clean_funny(arr[1]) + arr[2] = clean_funny(arr[2]) + if arr[1]: arr[1] = arr[1] + " " + arr[2] + del arr[2] + arr = [ re.sub(r' ','',a) for a in arr] + arr = [ re.sub(',','. ',a) for a in arr] + arr = [ re.sub('\(P\)','',a) for a in arr] + arr = [ a.strip() for a in arr] + #del arr[-1] + r = ','.join(arr)+'\n' + r = re.sub('\n','',r) + r = re.sub('add to worksheet','',r) + d("Row Txt Fxn returns: " + r + "\n\n") + + return r + '\n' + + +# take text lines and condense them to one dict per section +def to_section_list(input_text,verbose=0): + this_course = '' + #todo: no output files + #jout = codecs.open(filename, 'w', 'utf-8') + #input = csv.DictReader(open(schedfile,'r')) + #input = UnicodeDictReader(input_text.splitlines()) + all_courses = [] + + try: + f = StringIO(input_text) + except: + print("ERROR with this input_text:") + print(input_text) + reader = csv.reader(f, delimiter=',') + headers = next(reader) + for r in reader: + d = dict(list(zip(headers,r))) + #pdb.set_trace() + # clean funny unicode char in blank entries + r = {k: clean_funny2(v) for k,v in list(d.items()) } + if verbose: print("Cleaned: " + str(r)) + + if 'time' in r: + if r['time']=='TBA': r['time'] = '' + if r['time']: r['partofday'] = time_to_partofday(r['time']) + + r['type'] = '' + + if 'loc' in r: + if r['loc'] == 'ONLINE': r['type'] = 'online' + if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live' + if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live' + if r['loc']: r['site'] = room_to_site(r['loc'],verbose) + + if 'code' in r: + if re.search(r'ONLINE\sLIVE',r['code']): + r['type'] = 'online live' + elif re.search(r'ONLINE',r['code']): + r['type'] = 'online' + + # does it have a section? it is the last course + if r['crn']: # is a new course or a continuation? + if verbose: print(" it's a new section.") + if this_course: + if not this_course['extra']: this_course.pop('extra',None) + all_courses.append(this_course) + this_course = r + #print(r['name']) + this_course['extra'] = [] + else: + # is a continuation line + if verbose: print(" additional meeting: " + str(r)) + for k,v in list(r.items()): + if not v: r.pop(k,None) + # TODO: if extra line is different type? + #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid' + #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid' + this_course['extra'].append(r) + return all_courses + + +# Schedule / course filling history +# csv headers: crn, code, teacher, datetime, cap, act, wlcap, wlact +# Log the history of enrollments per course during registration +def log_section_filling(current_sched_list): + rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ') + rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ') + print(rows_j) + now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M') + csv_fn = 'cache/reg_history_' + short_sem + '.csv' + with codecs.open(csv_fn,'a','utf-8') as f: + writer = csv.writer(f) + for S in current_sched_list: + #print(S) + items = [now,] + [ items.append( S[X] ) for X in rows_j ] + writer.writerow(items) + +# Same as above, but compressed, act only +def log_section_filling2(current_sched_list): + now = datetime.datetime.now().strftime('%Y-%m-%dT%H') + + todays_data = { int(S['crn']): S['act'] for S in current_sched_list } + #print(todays_data) + + todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now]) + todays_df = todays_df.rename_axis('crn') + #print(todays_df) + todays_df.to_csv('cache/reg_today_new.csv', index=True) + + try: + myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv') + print(myframe) + except: + fff = open('cache/reg_data_'+short_sem+'.csv','w') + fff.write('crn\n') + fff.close() + myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv') + #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now]) + #myframe = myframe.rename_axis('crn') + print("Creating new data file for this semester.") + + new_df = myframe.join( todays_df, on='crn', how='outer' ) + new_df = new_df.rename_axis('crn') + print(new_df) + + reg_data_filename = 'reg_data_' + short_sem + '.csv' + new_df.to_csv('cache/' + reg_data_filename, index=False) + put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0) + +# Input: xxxx_sched.json. Output: xxxx_latestarts.txt +def list_latestarts(term): + + show_summary = 1 + + the_year = '20' + term[2:4] + print("year: ", the_year, " semester: ", term) + + term_in = "cache/%s_sched.json" % term + term_out = "cache/%s_latestarts.txt" % term + expanded_out = "%s_sched_expanded.json" % term + print("Writing output to " + term_out) + infile = codecs.open(term_in, "r", "utf-8") + outfile = codecs.open(term_out, "w", "utf-8") + exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8") + expanded = [] + sched = json.loads(infile.read()) + #print sched + by_date = {} + + if show_summary: print("course \t loc \t type \t time") + + for C in sched: + if (not C['type']) and C['loc'] != 'ONLINE': # and C['time']: + C['type'] = 'in-person' + + if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time'])) + + if 'extra' in C: + if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE": + C['type'] = 'hybrid' + + times = C['time'].split("-") + if len(times) > 1: + time_start = times[0] + time_end = times[1] + + try: + startt = time.strptime(time_start,"%I:%M %p") + endt = time.strptime(time_end,"%I:%M %p") + min_start = startt.tm_min + min_end = endt.tm_min + if min_start == 0: min_start = "00" + else: min_start = str(min_start) + if min_end == 0: min_end = "00" + else: min_end = str(min_end) + C['time_start'] = "%i:%s" % (startt.tm_hour, min_start ) + C['time_end'] = "%i:%s" % (endt.tm_hour, min_end ) + if 0: + print("+ Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end'])) + except Exception as e: + print(e, "\n-- problem parsing time ", time_start, " or ", time_end) + else: + C['time_start'] = '' + C['time_end'] = '' + + if re.search('TBA',C['date']): + C['start'] = '' + C['end'] = '' + C['doy'] = '' + expanded.append(C) + continue + + parts = C['date'].split("-") + start = parts[0] + "/" + the_year + end = parts[1] + "/" + the_year + + try: + startd = parser.parse(start) + endd = parser.parse(end) + C['start'] = "%i-%i" % (startd.month,startd.day) + C['end'] = "%i-%i" % (endd.month,endd.day) + C['doy'] = startd.timetuple().tm_yday + expanded.append(C) + except Exception as e: + print(e, "\n-- problem parsing ", start, " or ", end) + if not startd in by_date: + by_date[startd] = [] + by_date[startd].append(C) + + exoutfile.write( json.dumps(expanded,indent=2) ) + exoutfile.close() + put_file('/home/public/schedule/', 'cache/', expanded_out, 0) + + for X in sorted(by_date.keys()): + #print("Start: ", X) + if len(by_date[X]) < 200: + prettydate = X.strftime("%A, %B %d") + #print(prettydate + ": " + str(len(by_date[X])) + " courses") + outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n") + for Y in by_date[X]: + #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + #print(Y) + #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n") + outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n") + outfile.close() + put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0) + return expanded + + +# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed +def scrape_schedule(): + #url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu" + #url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu" + url = "https://lum-prod.ec.gavilan.edu/" + text = '' + + + #url = f"https://ssb-prod.ec.gavilan.edu/PROD/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term={sem_code}" + + try: + driver = webdriver.Firefox() + driver.get(url) + driver.implicitly_wait(15) + driver.find_element("id","usernameUserInput").clear() + driver.find_element("id","usernameUserInput").send_keys(GOO) + driver.find_element("name","password").send_keys(GOO_PIN) + #driver.find_element("name","loginform").submit() + # enter pw for mygav login + driver.find_element("xpath","/html/body/div[1]/div/div/div/form/div[3]/div/button").click() + driver.implicitly_wait(10) + + # click student tab + print(driver.title) + driver.find_element("xpath","/html/body/div[2]/nav/div/div[1]/ul/li[3]/a/span").click() + driver.implicitly_wait(10) + + # Setup wait for later + wait = WebDriverWait(driver, 10) + original_window = driver.current_window_handle + + # click ssb button. Opens a new tab + driver.implicitly_wait(10) + driver.find_element("xpath","/html/body/div[2]/div/div[3]/div/div[1]/div/div[1]/section/div/div/div/div[1]/div/div/div/h5/a").click() + driver.implicitly_wait(20) + + # Wait for the new window or tab + wait.until(EC.number_of_windows_to_be(2)) + + # Loop through until we find a new window handle + for window_handle in driver.window_handles: + if window_handle != original_window: + driver.switch_to.window(window_handle) + break + + print(driver.title) + + driver.find_element(By.LINK_TEXT, "Students").click() + driver.implicitly_wait(20) + print(driver.title) + + driver.find_element(By.LINK_TEXT, "Registration").click() + driver.implicitly_wait(10) + print(driver.title) + + driver.find_element(By.LINK_TEXT, "Search for Classes").click() + driver.implicitly_wait(15) + print(driver.title) + + dd = Select(driver.find_element("name","p_term")) + if (dd): + dd.select_by_visible_text(SEMESTER) + driver.find_element("xpath","/html/body/div/div[4]/form").submit() + driver.implicitly_wait(15) + print(driver.title) + + driver.find_element("xpath","/html/body/div/div[4]/form/input[18]").click() + driver.implicitly_wait(10) + print(driver.title) + + driver.find_element("name","SUB_BTN").click() + driver.implicitly_wait(40) + time.sleep(15) + driver.implicitly_wait(40) + print(driver.title) + text = driver.page_source + driver.quit() + + except Exception as e: + print("Got an exception: ", e) + print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()[-1][1])) + return + finally: + print("") + #driver.quit() + + codecs.open('cache/' + filename_html,'w', 'utf-8').write(text) + + + #print(text) + as_list = ssb_to_csv(text) + #print(as_list) + as_dict = to_section_list(as_list) + jj = json.dumps(as_dict,indent=2) + + # TODO + try: + print("Opening " + 'cache/'+filename) + ps = codecs.open('cache/'+filename,'r','utf-8') + prev_sched = json.loads(ps.read()) + ps.close() + print("ok") + + if 1: # sometimes I want to re-run this without affecting the logs. + log_section_filling(as_dict) + log_section_filling2(as_dict) + + dd = DeepDiff(prev_sched, as_dict, ignore_order=True) + print("diff done") + pretty_json = json.dumps( json.loads( dd.to_json() ), indent=2 ) + codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write( pretty_json ) # dd.to_json() ) + + except Exception as e: + print(e) + print("Can't do diff?") + + # Next, rename the prev sched_xxYY.json data file to have its date, + # make this new one, and then upload it to the website. + # Maybe even count the entries and do a little sanity checking + # + # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt"))) + # print("Created: %s" % time.ctime(os.path.getctime("test.txt"))) + + try: + last_mod = time.ctime(os.path.getmtime('cache/' + filename)) + + import pathlib + prev_stat = pathlib.Path('cache/' + filename).stat() + mtime = dt.fromtimestamp(prev_stat.st_mtime) + print(mtime) + except: + print("Couldn't Diff.") + # fname = pathlib.Path('test.py') + # assert fname.exists(), f'No such file: {fname}' # check that the file exists + # print(fname.stat()) + # + # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413, + # st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272) + + + + codecs.open('cache/' + filename, 'w', 'utf-8').write(jj) + + put_file('/home/public/schedule/', 'cache/', filename, 0) # /gavilan.edu/_files/php/ + + return as_dict + + + + +def scrape_schedule_multi(): + + global SEMESTER, short_sem, semester_begin, filename, filename_html, sem_code + + + SEMESTER = 'Fall 2024' + sem_code = '202470' + short_sem = 'fa24' + semester_begin = strptime('08/26', '%m/%d') + filename = 'fa24_sched.json' + filename_html = 'fa24_sched.html' + + as_dict = scrape_schedule() + + expanded = list_latestarts(short_sem) + fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",") + + ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8') + with ffcsv as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(fields) + + for S in expanded: + parts = S['code'].split(' ') + S['dept'] = parts[0] + S['num'] = parts[1] + S['gp'] = gp[parts[0]] + S['dean'] = dean[parts[0]] + S['sem'] = short_sem + # S['act'] = S['cap'] + if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive' + csvwriter.writerow( [ S[x] for x in fields ] ) + + put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0) + +################ +################ SENDING DATA AWAY +################ +################ +################ + +# Upload a json file to www +def put_file(remotepath,localpath, localfile,prompt=1): + show_all = 0 + folder = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + cnopts = pysftp.CnOpts() + cnopts.hostkeys = None + with pysftp.Connection(FTP_SITE,username=FTP_USER, password=FTP_PW,cnopts=cnopts) as sftp: + #todo: these paths + #files = sftp.listdir() + #print(folder + "\tI see these files on remote: ", files, "\n") + sftp.chdir(remotepath) + files = sftp.listdir() + if show_all: print(folder + "\tI see these files on remote: ", files, "\n") + + localf = os.listdir(localpath) + + if show_all: print("I see these local: ", localf) + + if prompt: + input('ready to upload') + sftp.put(localpath+localfile, localfile, preserve_mtime=True) + sftp.close() + print("Uploaded %s" % localfile) + +scrape_schedule_multi() \ No newline at end of file diff --git a/localcache2.py b/localcache2.py index 1ecb47a..ff1c8ab 100644 --- a/localcache2.py +++ b/localcache2.py @@ -125,7 +125,8 @@ def all_gav_employees(): ''' def user_from_goo(goo): - goo = "G00" + goo + if not goo.lower().startswith("g00"): + goo = "G00" + goo q = f"SELECT * FROM canvas.pseudonyms p JOIN canvas.users u ON p.user_id=u.id WHERE p.sis_user_id='{goo}';" (connection,cursor) = db() cursor.execute(q, None) # execute query with optional parameters diff --git a/outcomes.py b/outcomes.py index 62d8d3e..a6e9e64 100644 --- a/outcomes.py +++ b/outcomes.py @@ -1267,6 +1267,16 @@ def code_from_ilearn_name(n,verbose=0): if v: print("ilearn: ", stringpad(n, 35), "*dept: ", stringpad(dept1,6), ' num: ', stringpad(num1,7), ' code: ', stringpad(code,11), " crn: ", stringpad(crn,9), " R: 7", end='') return (dept1,code,crn) + # KIN64A/64B/64C/64D 3 or 4 + a = re.search('^([A-Z]+)(\d+)([A-Z])\/(\d+)([A-Z])\/(\d+)([A-Z])\/?(\d+)?([A-Z])?$', code) + if a: + dept1 = a.group(1) + num1 = a.group(2)+a.group(3) + num2 = a.group(4)+a.group(5) + code = dept1+num1 + if v: print("ilearn: ", stringpad(n, 35), "*dept: ", stringpad(dept1,6), ' num: ', stringpad(num1,7), ' code: ', stringpad(code,11), " crn: ", stringpad(crn,9), " R: 7", end='') + return (dept1,code,crn) + return (0,0,0) diff --git a/outcomes2022.py b/outcomes2022.py index 2c9af27..2241852 100644 --- a/outcomes2022.py +++ b/outcomes2022.py @@ -30,7 +30,7 @@ from path_dict import PathDict outputfile = '' csvwriter = '' -TERM = 183 +TERM = 184 def escape_commas(s): diff --git a/requirements.txt b/requirements.txt index 42701e9..41426cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ attrs==23.2.0 Automat==22.10.0 Babel==2.14.0 bcrypt==4.1.2 -beautifulsoup4==4.12.3 +beautifulsoup4 bidict==0.22.1 blinker==1.7.0 blis==0.7.11 @@ -46,6 +46,7 @@ docxtpl==0.16.7 durable-rules==2.0.28 et-xmlfile==1.1.0 executing==2.0.1 +fast_autocomplete[levenshtein] filelock==3.13.1 Flask==3.0.1 Flask-SocketIO==5.3.6 @@ -119,10 +120,10 @@ ordered-set==4.1.0 orjson==3.9.12 outcome==1.3.0.post0 packaging==23.2 -paho-mqtt==1.6.1 -pampy==0.3.0 -pandas==2.2.0 -paramiko==3.4.0 +paho-mqtt +pampy +pandas +paramiko parsel==1.8.1 parso==0.8.3 path-dict==4.0.0 @@ -145,8 +146,8 @@ pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 pycryptodome==3.20.0 -pydantic==2.6.1 -pydantic_core==2.16.2 +pydantic +pydantic_core PyDispatcher==2.0.7 pyfiglet==1.0.2 pygame==2.5.2 @@ -165,28 +166,28 @@ python-dateutil==2.8.2 python-docx==1.1.0 python-engineio==4.8.2 python-socketio==5.11.0 -pytz==2024.1 -pywin32==306 +pytz +pywin32 PyYAML==6.0.1 queuelib==1.6.2 redis==5.0.1 referencing==0.33.0 regex==2023.12.25 -requests==2.31.0 -requests-file==2.0.0 -requests-oauthlib==1.3.1 +requests +requests-file +requests-oauthlib rich==13.7.0 rpds-py==0.17.1 rsa==4.9 safetensors==0.4.2 schedule==1.2.1 -scikit-learn==1.4.0 -scipy==1.12.0 -Scrapy==2.11.0 +scikit-learn +scipy==1.10.1 +Scrapy seaborn==0.13.2 -selenium==4.17.2 +selenium sentence-transformers==2.3.1 -sentencepiece==0.1.99 +sentencepiece service-identity==24.1.0 simple-websocket==1.0.0 simpy==4.1.1 @@ -195,7 +196,7 @@ smart-open==6.4.0 sniffio==1.3.0 sortedcontainers==2.4.0 soupsieve==2.5 -spacy==3.7.2 +spacy spacy-legacy==3.0.12 spacy-loggers==1.0.5 SQLAlchemy==2.0.25 @@ -203,7 +204,7 @@ srsly==2.4.8 stack-data==0.6.3 statsmodels==0.14.1 striprtf==0.0.26 -sympy==1.12 +sympy tabulate==0.9.0 TatSu==5.11.3 tenacity==8.2.3 diff --git a/useful queries.sql b/useful queries.sql index e7efdf7..08c65d9 100644 --- a/useful queries.sql +++ b/useful queries.sql @@ -60,7 +60,7 @@ join canvas.users u on u.id=e.user_id join canvas.communication_channels cc on u.id=cc.user_id full outer join canvas.schedule s on c.id=s.canvascourse where (s.type='online' or s.type='hybrid' or s.type='online line') - and c.sis_source_id like '202450-%' + and c.sis_source_id like '202470-%' and cc.path_type='email' and not cc.path like '%noemail%' and not cc.path='sstaff@gavilan.edu' @@ -162,7 +162,7 @@ join canvas.enrollments e on u.id=e.user_id join canvas.courses c on e.course_id=c.id join canvas.schedule s on c.id=s.canvascourse join canvas.pseudonyms p on u.id=p.user_id -where s.sem='202430' and e.workflow_state='active' and e.type='StudentEnrollment' +where s.sem='202470' and e.workflow_state='active' and e.type='StudentEnrollment' group by u.sortable_name, p.sis_user_id order by total desc, online desc, onlinelive desc, hybrid desc; diff --git a/useful_flexday_queries.sql b/useful_flexday_queries.sql new file mode 100644 index 0000000..31c3bae --- /dev/null +++ b/useful_flexday_queries.sql @@ -0,0 +1,49 @@ + +use db; + +-- all users, every signup ever +select u.goo, lower(u.email) as email, u.name, u.id as userid, s.session as sessionid, s.certified_at, e.title, e.starttime, e.type, e.id as sesid, +e.parent from conf_users u +join conf_signups s on u.id = s.user +join conf_sessions e on e.id = s.session +order by lower(u.name), e.starttime; + + +-- all comments on all sessions by all users ever +select lower(u.email) as email, u.id, a.session, a.answer from conf_answers a +join conf_users u on a.user=u.id +where a.question=2 +order by lower(u.name), a.session; + + +-- sample INSERT +INSERT INTO `conf_sessions` (`id`, `title`, `desc`, `type`, `length`, `starttime`, `track`, `location`, `location_irl`, `mode`, `gets_survey`, `is_flex_approved`, `category`, `category_b`, `author`, `is_custom`, `parent`, `recording`, `instructions`, `benefit`, `image_url`, `cal_uid`) VALUES +(1386, 'Gav Connect Faculty Training', 'Gav Connect is a comprehensive online technology used to communicate with students about their academic progress and connect them with the campus support and guidance they need to succeed throughout their college journey. Come learn how you can use Gav Connect in your classroom to support student success with just a few clicks of a mouse!\r\n', 101, 1, '2024-01-26 13:00:00', 2, '', 'SS 206', 'inperson', 1, 1, 0, NULL, NULL, NULL, 1364, NULL, NULL, NULL, NULL, NULL); + +-- sample update +UPDATE conf_sessions set mode='online' where mode='zoom' and id>1386; + + +-- all sessions ever, with hosts +select s.id as sessionid, s.title, s.starttime, s.mode, s.parent, group_concat(u1.name separator ",") as hostname from conf_sessions s +join conf_hosts h on h.session=s.id +join conf_users u1 on h.host=u1.id +group by s.id +order by s.id desc; + + +-- all sessions ever, with signups +select s.id as sessionid, s.title, s.starttime, u.name from conf_sessions s +join conf_signups i on i.session=s.id +join conf_users u on u.id=i.user +order by s.id desc; + + +-- all sessions ever, with ratings, comments +select s.id as sessionid, s.title, s.starttime, u.name, a.answer, a.question from conf_sessions s +join conf_answers a on a.session=s.id +join conf_users u on u.id=a.user +order by s.id desc, a.question; + + + diff --git a/users.py b/users.py index 7e3c02d..9602268 100644 --- a/users.py +++ b/users.py @@ -1797,7 +1797,7 @@ def track_user(id=0,qid=0): if 1: # hard code dates start_date = "2024-01-01T00:00:00-07:00" - end_date = "2024-07-01T00:00:00-07:00" + end_date = dt.now().strftime("%Y-%m-%dT%H:%M:%S-07:00") # "2024-10-01T00:00:00-07:00" url_addition = f"?start_time={start_date}&end_time={end_date}" elif 'last_days_log' in info: print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))