spring 25 updates

This commit is contained in:
Peter Howell 2025-05-09 09:39:32 -07:00
parent 9bfab84aae
commit 68e48ec6ac
22 changed files with 4353 additions and 2298 deletions

158
calendarget.py Normal file
View File

@ -0,0 +1,158 @@
from ast import FormattedValue
from googleapiclient.discovery import build
import datetime
from datetime import timedelta
from zoneinfo import ZoneInfo
import win32com.client
from canvas_secrets import GOOGLE_API_KEY
# Replace these with your own API key and Calendar ID.
calendars = {'peter_main':'peter.howell@gmail.com',
'aly_and_peter':'5qgh1nv9g5on3am4vres9i451c@group.calendar.google.com',
'tlc':'4aq36obt0q5jjr5p82p244qs7c@group.calendar.google.com',
'birthdays':'4q73r3ern2k9k83t0orq6iqaac@group.calendar.google.com'}
def to_my_timezone(d, md_table_format=0):
# Parse the datetime string into a timezone-aware datetime.
dt = datetime.datetime.fromisoformat(d)
# Convert to Pacific Time.
dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
# Format the datetime. Note:
# - %A: full weekday name (e.g., Thursday)
# - %B: full month name (e.g., April)
# - %d: day of the month (with leading zero, so we'll remove it later)
# - %I: hour in 12-hour format (with leading zero)
# - %M: minute (with leading zero)
# - %p: AM/PM indicator (will be in uppercase)
formatted = dt_pacific.strftime("%A, %B %d | %I:%M%p")
# Remove a leading zero from the day and hour if present
formatted = formatted.replace(" 0", " ")
# Convert the AM/PM indicator to lowercase
formatted = formatted.replace("AM", "am").replace("PM", "pm")
return formatted
#return dt_pacific.strftime("%Y-%m-%d %H:%M:%S %Z%z")
def in_my_timezone(d, md_table_format=0):
# Parse the datetime string into a timezone-aware datetime.
dt = datetime.datetime.fromisoformat(d)
# Convert to Pacific Time.
#dt_pacific = dt.astimezone(ZoneInfo("America/Los_Angeles"))
# Format the datetime. Note:
# - %A: full weekday name (e.g., Thursday)
# - %B: full month name (e.g., April)
# - %d: day of the month (with leading zero, so we'll remove it later)
# - %I: hour in 12-hour format (with leading zero)
# - %M: minute (with leading zero)
# - %p: AM/PM indicator (will be in uppercase)
formatted = dt.strftime("%A, %B %d | %I:%M%p")
# Remove a leading zero from the day and hour if present
formatted = formatted.replace(" 0", " ")
# Convert the AM/PM indicator to lowercase
formatted = formatted.replace("AM", "am").replace("PM", "pm")
return formatted
def gcal():
# Build the service using the API key.
service = build('calendar', 'v3', developerKey=GOOGLE_API_KEY)
n = 30
for name,id in calendars.items():
# Get the current time in RFC3339 format (UTC).
now = datetime.datetime.utcnow().isoformat() + 'Z'
print(f'Getting the upcoming {n} events')
events_result = service.events().list(
calendarId=id,
timeMin=now,
maxResults=n,
singleEvents=True,
orderBy='startTime'
).execute()
events = events_result.get('items', [])
if not events:
print('No upcoming events found.')
return
print(f"| Date | Time | Event | Lead |")
print(f"|------|------|-------|------|")
for event in events:
# Depending on the event, the start time might be a date or dateTime.
start = event['start'].get('dateTime', event['start'].get('date'))
print(f"| {to_my_timezone(start,1)} | {event.get('summary', 'No Title')} | | |")
def ocal():
# Initialize Outlook COM object.
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
#outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
#print(outlook)
#print(dir(outlook))
#print(dir(outlook.Folders))
root_folder = outlook.Folders.Item(1)
print (f"Root folder: {root_folder.Name}")
#And to know the names of the subfolders you have:
#print("\nFolders:")
#for folder in root_folder.Folders:
# print (" " + folder.Name)
# Get the default calendar folder.
calendar_folder = outlook.GetDefaultFolder(9) # 9 refers to the Calendar folder
#print(calendar_folder)
#print(dir(calendar_folder))
#print(calendar_folder.Items)
items = calendar_folder.Items
print("Total items in Calendar:", items.Count)
# Define the time window for which to fetch events.
n = 14
now = datetime.datetime.now()
end = now + timedelta(days=n) # next 7 days
# Restrict the calendar items to the time window.
# The Outlook filter syntax uses dates in "mm/dd/yyyy hh:mm" format.
filter_start = now.strftime("%m/%d/%Y %H:%M")
filter_end = end.strftime("%m/%d/%Y %H:%M")
restriction = f"[Start] >= '{filter_start}' AND [End] <= '{filter_end}'"
calendar_items = calendar_folder.Items
calendar_items.IncludeRecurrences = True
calendar_items.Sort("[Start]")
#print(calendar_items)
print(f"Calendar items in next {n} days:")
restricted_items = calendar_items.Restrict(restriction)
for item in restricted_items:
#for item in calendar_items:
start_dt = item.Start # a COM datetime object
start = in_my_timezone(str(start_dt),1)
subject = item.Subject
print(f"{start} - {subject}")
if __name__ == '__main__':
ocal()

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,15 @@
from ast import Try
#from ast import Try, TryStar
import json, re, requests, codecs, sys, time, funcy, os
import pandas as pd
from datetime import datetime
import pytz
from dateutil import parser
#from dateutil import parser
from datetime import datetime
#from symbol import try_stmt
from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
from pipelines import sems
from pipelines import fetch, fetch_stream, fetch_collapse, header, url
from schedules import get_semester_schedule
#from pipelines import sems
from localcache import course_quick_stats, get_courses_in_term_local, course_student_stats, all_sem_courses_teachers, full_reload
from localcache2 import db, users_new_this_semester, users_new_this_2x_semester, course_from_id, user_ids_in_shell
from collections import defaultdict
@ -399,7 +400,8 @@ def course_term_summary_local(term="180",term_label="FA23"):
from localcache2 import student_count, teacher_list, course_from_id, course_sched_entry_from_id
# Relevant stuff trying to see if its even being used or not
def course_term_summary(term="287",term_label="SP25"):
# relies on schedule being in database
def course_term_summary(term="289",term_label="FA25"):
print("Summary of %s" % term_label)
get_fresh = 1
courses = getCoursesInTerm(term, get_fresh, 0)
@ -490,7 +492,7 @@ GROUP BY c.code ORDER BY c.state, c.code""" % (S['id'],S['id'])
outp2.write("\n\n---------\nNOT PUBLISHED\n\n" + json.dumps(notpub, indent=2))
# Fetch all courses in a given term
def getCoursesInTerm(term=0,get_fresh=1,show=0,active=0): # a list
def getCoursesInTerm(term=0,get_fresh=1,show=1,active=0): # a list
if not term:
term = getTerms(1,1)
ff = 'cache/courses_in_term_%s.json' % str(term)
@ -590,15 +592,9 @@ def all_equal2(iterator):
return len(set(iterator)) <= 1
"""
180 2023 Fall
179 2023 Summer
178 2023 Spring
177 2023 Winter
"""
def semester_cross_lister():
sem = "sp25"
term = 287 #sp25
sem = "fa25"
term = 289
xlist_filename = f"cache/{sem}_crosslist.csv"
checkfile = codecs.open('cache/xlist_check.html','w','utf-8')
checkfile.write('<html><body><table>\n')
@ -700,6 +696,10 @@ def ez_xlist():
# Crosslist given 2 ids, computing the new name and code
def xlist(host_id, parasite_list):
host_info = course_from_id(host_id)
if not host_info:
print(f"Couldn't find course id {host_id} in database. Do you need to update it?")
return ""
host_info['crn'] = host_info['sis_source_id'][7:]
host_info['dept'] = dept_from_name( host_info['course_code'] )
host_info['num'] = num_from_name(host_info['course_code'] )
@ -709,6 +709,9 @@ def xlist(host_id, parasite_list):
para_info_list = [ course_from_id(x) for x in parasite_list ]
for p in para_info_list:
if not p:
print(f"Couldn't find course id for parasite in database. Do you need to update it?")
return ""
p['crn'] = p['sis_source_id'][7:]
p['dept'] = dept_from_name(p['course_code'] )
p['num'] = num_from_name(p['course_code'] )
@ -826,58 +829,69 @@ def course_term_summary_3():
# check number of students and publish state of all shells in a term
def all_semester_course_sanity_check():
outputfile = 'cache/courses_checker.csv'
t = 287
term = "sp25"
c = getCoursesInTerm(t,0,0)
c = getCoursesInTerm(t,1,0)
sched1 = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
sched = { x['crn']: x for x in sched1 }
#codecs.open('cache/courses_in_term_{t}.json','w','utf-8').write(json.dumps(c,indent=2))
#output = codecs.open('cache/courses_w_sections.csv','w','utf-8')
#output.write( ",".join(['what','id','parent_course_id','sis_course_id','name']) + "\n" )
output2 = codecs.open('cache/courses_checker.csv','w','utf-8')
output2 = codecs.open(outputfile,'w','utf-8')
output2.write( ",".join(['id','sis_course_id','name','state','mode','startdate','students']) + "\n" )
htmlout = codecs.open('cache/courses_checker.html','w','utf-8')
htmlout.write('<html><body><table>\n')
htmlout.write(f'<tr><td><b>Name</b></td><td><b>SIS ID</b></td><td><b>State</b></td><td><b>Mode</b></td><td><b>Start Date</b></td><td><b># Stu</b></td></tr>\n')
html_sections = []
i = 0
for course in c:
u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
course['info'] = fetch(u2)
try:
u2 = url + '/api/v1/courses/%s?include[]=total_students' % str(course['id'])
course['info'] = fetch(u2)
# correlate to schedule
crn = course['sis_course_id'][7:]
ctype = '?'
cstart = '?'
ts = '?'
if crn in sched:
ctype = sched[crn]['type']
cstart = sched[crn]['start']
ts = sched[crn]['act']
# correlate to schedule
crn = course['sis_course_id'][7:]
ctype = '?'
cstart = '?'
ts = '?'
if crn in sched:
ctype = sched[crn]['type']
cstart = sched[crn]['start']
ts = sched[crn]['act']
info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts ]
info = list(map(str,info))
info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
info2 = list(map(str,info2))
output2.write( ",".join(info2) + "\n" )
output2.flush()
print(info2)
#output.write( ",".join(info) + "\n" )
info = [ 'course', course['id'], '', course['sis_course_id'], course['name'], course['workflow_state'], ts ]
info = list(map(str,info))
info2 = [ course['id'], course['sis_course_id'], course['name'], course['workflow_state'], ctype, cstart, ts ]
info2 = list(map(str,info2))
output2.write( ",".join(info2) + "\n" )
output2.flush()
print(info2)
#output.write( ",".join(info) + "\n" )
uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
htmlout.write(f'<tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
htmlout.flush()
#uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
#course['sections'] = fetch(uu)
#s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
#for row in s_info:
# print(row)
# output.write( ",".join( map(str,row) ) + "\n" )
#output.flush()
i += 1
#if i % 5 == 0:
# codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
uu = f"https://ilearn.gavilan.edu/courses/{course['id']}"
if course["workflow_state"]=='unpublished' and ctype=='online' and cstart=="1-27":
html_sections.append(f'<!--{course["name"]}--><tr><td><a href="{uu}" target="_blank">{course["name"]}</a></td><td>{course["sis_course_id"]}</td><td>{course["workflow_state"]}</td><td>{ctype}</td><td>{cstart}</td><td>{ts}</td></tr>\n')
#uu = url + '/api/v1/courses/%s/sections' % str(course['id'])
#course['sections'] = fetch(uu)
#s_info = [ [ 'section', y['id'], y['course_id'], y['sis_course_id'], y['name'], y['total_students'] ] for y in course['sections'] ]
#for row in s_info:
# print(row)
# output.write( ",".join( map(str,row) ) + "\n" )
#output.flush()
i += 1
#if i % 5 == 0:
# codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
except Exception as e:
print(f"error on {course}")
print(f"{e}")
#codecs.open('cache/courses_w_sections.json','w','utf-8').write(json.dumps(c,indent=2))
html_sections.sort()
for h in html_sections:
htmlout.write(h)
htmlout.write('</table></body></html>\n')
print(f"wrote to {outputfile}")
@ -999,7 +1013,9 @@ def unenroll_student(courseid,enrolid):
def enroll_id_list_to_shell(id_list, shell_id, v=0):
id_list = set(id_list)
# id list has pairs, [id,name]
id_list = set([i[0] for i in id_list])
existing = course_enrollment(shell_id) # by user_id
existing_ids = set( [ x['user_id'] for x in existing.values() ])
@ -1181,12 +1197,28 @@ def enroll_bulk_students_bydept(course_id, depts, the_term="172", cautious=1):
def enroll_gott_workshops():
# stupid gav tls broken
#r = requests.get("https://www.gavilan.edu/staff/tlc/db.php?a=signups")
r = requests.get("https://www.gavilan.edu/staff/tlc/signups.php")
text = r.text
# Regex to extract the JSON object
match = re.search(r"var\s+signups\s*=\s*(\[\{.*?\}\]);", text, re.DOTALL)
if match:
json_str = match.group(1) # Extract the JSON string
try:
signups = json.loads(json_str) # Convert to Python list of dicts
#print(json.dumps(signups,indent=2))
except json.JSONDecodeError as e:
print("Error decoding JSON:", e)
return
else:
print("JSON object not found")
return
#signups = json.loads(r.text)
#signups = json.loads(codecs.open('cache/signups.json','r','utf-8').read())
all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
# update w/ users.py #1
all_staff = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
@ -1212,8 +1244,13 @@ def enroll_gott_workshops():
#'6/10-6/23 GOTT 5: Essentials of Blended Learning, Hyflex2024-06-10 12:00:00': 20568,
#'6/17-6/30 GOTT 6 Introduction to Live Online Teaching and Learning2024-06-17 12:00:00': 20569,
#'GOTT 1 Intro to Teaching Online AUG242024-07-29 12:00:00': 20603, # 7/29
['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
#['2025-01-01 16:00:00 GOTT 1: Intro to Teaching Online with Canvas', 21770, 'enroll_gott1.txt'],
#['2025-01-01 16:00:00 GOTT 2: Introduction to Asynchronous Teaching and Design', 21772, 'enroll_gott2.txt']
# date, title, shell_id
#['2025-02-23 16:00:00', 'GOTT 6: Intro to Synchronous Teaching (Sync/Hyflex)', 21835],
['2025-03-14 17:00:00', 'GOTT 5: The Essentials of Blended Learning (Hybrid) ', '21886'],
#['2025-02-23 16:00:00', 'GOTT 1: Intro to Teaching Online (2 week, async)', 21874]
]
#print(json.dumps(signups,indent=4))
#print(json.dumps(by_email,indent=4))
@ -1236,31 +1273,30 @@ def enroll_gott_workshops():
'rpotter@gav.edu': 'rpotter@gavilan.edu',
}
#for wkshp,su_list in signups.items():
print(workshop_ids)
for each_workshop in workshop_ids:
#if wkshp not in workshop_ids:
# print(f"skipping {wkshp}")
# continue
wkshp, shell_id, student_list = each_workshop
wkshp_date, wkshp_title, wkshp_shell_id = each_workshop
to_enroll = []
from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
#from_file = [ L.strip().split(' - ') for L in codecs.open(f'cache/{student_list}', 'r', 'utf-8').readlines() ]
#print(from_file)
for s in from_file:
e = s[1].lower()
if e in subs:
e = subs[e]
print( f"{wkshp} {e} {s[0]}" )
if e in by_email:
user = by_email[e]
#print(f"\t{user['name']} {e} {user['login_id']}")
to_enroll.append(user['id'])
else:
#print("** ** NOT FOUND")
pass
print(f"Workshop: {wkshp} \n\tEnrolling: {str(to_enroll)}")
enroll_id_list_to_shell(to_enroll, shell_id)
for s in signups:
if wkshp_date == s['date_rsvp'] and wkshp_title == s['training']:
e = s['email'].lower()
if e in subs:
e = subs[e]
print( f"{wkshp_title} {e} {s['name']}" )
if e in by_email:
user = by_email[e]
#print(f"\t{user['name']} {e} {user['login_id']}")
to_enroll.append([user['id'],user['name']])
else:
#print("** ** NOT FOUND")
pass
print(f"Workshop: {wkshp_date} {wkshp_title} \n\tEnrolling: {', '.join(i[1] for i in to_enroll)}")
enroll_id_list_to_shell(to_enroll, wkshp_shell_id)
def enroll_gnumber_list_to_courseid():
infile = codecs.open('cache/gottenrollments.txt','r','utf-8').readlines()
@ -1312,7 +1348,7 @@ def enroll_orientation_students():
# users_to_enroll = users_new_this_semester(the_semester) ### ##### USES LOCAL DB
# double semester (SU + FA)
users_to_enroll = users_new_this_2x_semester("202510", "202530") ##### USES LOCAL DB
users_to_enroll = users_new_this_2x_semester("202550", "202570") ##### USES LOCAL DB
#print("ALL ORIENTATION STUDENTS %s" % str(users_to_enroll))
#print("\n\nALREADY IN ORI SHELL %s" % str(users_in_ori_shell))
@ -1406,6 +1442,138 @@ def course_search_by_sis():
# print(json.dumps(x, indent=2))
def set_custom_start_dates():
TERM = 288
SEM = "su25"
make_changes = 1
do_all = 0
get_fresh = 0
term_start_month = 6
term_start_day = 2
# just do certain ids in cache/changeme.txt
limit_to_specific_ids = 1
limit_to = [x.strip() for x in open('cache/changeme.txt','r').readlines()]
# get list of online course shells
if get_fresh:
print(f"Getting list of courses in {SEM}")
c = getCoursesInTerm(TERM,get_fresh,0)
codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
else:
c = json.loads( codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read() )
# dict to match section numbers between shells and schedule
crn_to_canvasid = {}
for C in c:
if 'sis_course_id' in C and C['sis_course_id']:
print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
else:
print( f"---NO CRN IN: {C['name']} -> {C}" )
# get course info from schedule
s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
for S in s:
# get dates
start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
d_start = datetime.strptime(start,"%m/%d/%Y")
# try to find online shell matching this schedule entry
try:
this_id = crn_to_canvasid[S['crn']]
if limit_to_specific_ids and (not this_id in limit_to):
continue
except Exception as e:
print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
continue
print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
# Do we adjust the start date? Only if it doesn't match term
if d_start.month == term_start_month and d_start.day == term_start_day:
print(" Ignoring, term start date" )
continue
else:
print(" Adjust course start day?")
if make_changes:
if do_all != 'a':
do_all = input(' -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit. >')
if do_all == 'n':
exit()
if do_all == '' or do_all == 'a':
data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
'course[restrict_enrollments_to_course_dates]':True }
u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
r3 = requests.put(u2, headers=header, params=data)
print(" updated.. OK")
def overview_start_dates():
TERM = 288
SEM = "su25"
get_fresh = 1
term_start_month = 6
term_start_day = 2
# get list of online course shells
if get_fresh:
print(f"Getting list of courses in {SEM}")
c = getCoursesInTerm(TERM,get_fresh,0)
codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
else:
c = json.loads( codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read() )
# dict to match section numbers between shells and schedule
crn_to_canvasid = {}
for C in c:
if 'sis_course_id' in C and C['sis_course_id']:
print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
else:
print( f"---NO CRN IN: {C['name']} -> {C}" )
print(f"id,shell_shortname,sched_start,shell_start,shell_end,shell_restrict_view_dates,shell_restrict_view_dates,shell_state,shell_numstudents" )
# get course info from schedule
s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
for S in s:
# get dates
start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
d_start = datetime.strptime(start,"%m/%d/%Y")
# try to find online shell matching this schedule entry
try:
this_id = crn_to_canvasid[S['crn']]
except Exception as e:
print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
continue
# get more canvas course shell info
uu = f"{url}/api/v1/courses/{this_id}"
this_course = fetch(uu)
shell_start = this_course['start_at']
shell_end = this_course['end_at']
shell_restrict_view_dates = '?'
if 'access_restricted_by_date' in this_course:
shell_restrict_view_dates = this_course['access_restricted_by_date']
shell_shortname = this_course['course_code']
shell_numstudents = '?' #this_course['total_students']
shell_state = this_course['workflow_state']
print(f"{this_id},{shell_shortname},{d_start},{shell_start},{shell_end},{shell_restrict_view_dates},{shell_restrict_view_dates},{shell_state},{shell_numstudents}" )
@ -1426,6 +1594,7 @@ def course_by_depts_terms(section=0):
nursing_start_day = 0
spring_start_day = 27
# get list of online course shells
if get_fresh:
print(f"Getting list of courses in {SEM}")
c = getCoursesInTerm(TERM,get_fresh,0)
@ -1433,6 +1602,7 @@ def course_by_depts_terms(section=0):
else:
c = json.loads( codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read() )
# dict to match section numbers between shells and schedule
crn_to_canvasid = {}
for C in c:
if 'sis_course_id' in C and C['sis_course_id']:
@ -1441,16 +1611,14 @@ def course_by_depts_terms(section=0):
else:
print( f"---NO CRN IN: {C['name']} -> {C}" )
#print(crn_to_canvasid)
#return
#s = json.loads( codecs.open(f'cache/{SEM}_sched_expanded.json','r','utf-8').read() )
# get course info from schedule
s = requests.get(f"http://gavilan.cc/schedule/{SEM}_sched_expanded.json").json()
for S in s:
# get dates
start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
d_start = datetime.strptime(start,"%m/%d/%Y")
# try to find online shell matching this schedule entry
try:
this_id = crn_to_canvasid[S['crn']]
except Exception as e:
@ -1514,11 +1682,11 @@ def xlist_cwe():
# cwe192 get put into another shell
this_sem_190_id = 21606 # they get 190s and 290s
this_sem_192_id = 21610 # they get 192s
this_sem_term = 287
this_sem_190_id = 22890 # they get 190s and 290s
this_sem_192_id = 22894 # they get 192s
this_sem_term = 289
get_fresh = 0
get_fresh = 1
sem_courses = getCoursesInTerm(this_sem_term, get_fresh, 0)
for search_string in ['CWE190','WTRM290']:
@ -1630,8 +1798,9 @@ def create_sandboxes():
#(19223, ' Sandbox GOTT5 WI24'),
#(19224, ' Sandbox GOTT6 WI24'),
#(20761, ' Sandbox GOTT1 FA24'),
(21770, ' Sandbox GOTT1 WI25'),
(21772, ' Sandbox GOTT2 WI25')
#(21770, ' Sandbox GOTT1 WI25'),
#(21772, ' Sandbox GOTT2 WI25'),
(21874, ' Sandbox GOTT1 SP25'),
]
filepath = 'cache/sandbox_courses.pkl'
@ -1871,8 +2040,8 @@ def instructor_list_to_activate_evals():
def add_evals(section=0):
# show or hide?
TERM = 184
SEM = "fa24"
TERM = 287
SEM = "sp25"
# fetch list of courses?
GET_FRESH_LIST = 0
@ -1887,7 +2056,7 @@ def add_evals(section=0):
ASK = 0
# are we showing or hiding the course eval link?
HIDE = False
HIDE = True
s = [ x.strip() for x in codecs.open(f'cache/{SEM}_eval_sections.txt','r').readlines()]
@ -1922,6 +2091,7 @@ def add_evals(section=0):
print(f"{courses[i]['id']} / {courses[i]['name']}")
u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s/tabs/context_external_tool_1953" % i
r3 = requests.put(u2, headers=header, params=data)
print(f"OK {u2}")
#print(r3.text)
#time.sleep(0.400)
@ -2059,9 +2229,8 @@ def my_nav_filter(row):
def clean_course_nav_setup_semester(section=0):
print("Fetching list of all active courses")
term = 184 # fa24 # 182
term = 287
c = getCoursesInTerm(term,1,0) # sp25 = 287 wi24=182
term = 289
c = getCoursesInTerm(term,1,0)
print(c)
ids = []
courses = {}
@ -2220,35 +2389,36 @@ def quick_sem_course_list(term=180):
def create_calendar_event():
events = codecs.open('cache/events.csv','r','utf-8').readlines()
events = codecs.open('cache/academic_calendar_2025.csv','r','utf-8').readlines()
for e in events:
(date, title, desc) = e.split(',')
local = pytz.timezone("America/Los_Angeles")
naive = datetime.strptime(date, "%Y-%m-%d")
local_dt = local.localize(naive, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc).isoformat()
orientation_shells = ["course_15924","course_19094","course_20862"]
for ori_shell in orientation_shells:
for e in events:
(date, title, desc) = e.split(',')
local = pytz.timezone("America/Los_Angeles")
naive = datetime.strptime(date, "%Y-%m-%d")
local_dt = local.localize(naive, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc).isoformat()
params = {
"calendar_event[context_code]": "course_15924", # 2023 student orientation
"calendar_event[context_code]": "course_19094", # 2024 orientation
"calendar_event[title]": title,
"calendar_event[description]": desc,
"calendar_event[start_at]": utc_dt, # DateTime
"calendar_event[all_by_dept": "true",
params = {
"calendar_event[context_code]": ori_shell,
"calendar_event[title]": title,
"calendar_event[description]": desc,
"calendar_event[start_at]": utc_dt, # DateTime
"calendar_event[all_by_dept": "true",
}
}
u = url + "/api/v1/calendar_events"
res = requests.post(u, headers = header, params=params)
result = json.loads(res.text)
print(title,end=" ")
if "errors" in result:
print(result["errors"])
if "id" in result:
print("ok, id#", result["id"])
u = url + "/api/v1/calendar_events"
res = requests.post(u, headers = header, params=params)
result = json.loads(res.text)
print(title,end=" ")
if "errors" in result:
print(result["errors"])
if "id" in result:
print("ok, id#", result["id"])
def utc_to_local(utc_str):
if not utc_str: return ""
@ -2327,12 +2497,12 @@ def enrollment_helper():
keep = 'code,name,days,cap,act,teacher,date,partofday,type,site'.split(',')
oo = codecs.open('cache/section_history.json','w','utf-8')
# fetch enrollment stats for last few years
from semesters import code, sems, to_sis_sem
from semesters import code, sems_by_short_name, short_to_sis
from util import dept_from_name
raw = []
code.reverse()
sort = defaultdict(dict)
for s in sems.keys():
for s in sems_by_short_name.keys():
try:
sched1 = requests.get(f"http://gavilan.cc/schedule/{s}_sched_expanded.json").json()
sort[s] = defaultdict(dict)
@ -2340,7 +2510,7 @@ def enrollment_helper():
if sect['name'] in ignore2:
continue
sect_smaller = funcy.project(sect,keep)
sect_smaller['sem'] = to_sis_sem(s)
sect_smaller['sem'] = short_to_sis(s)
if int(sect_smaller['cap'])==0 or int(sect_smaller['act'])==0:
sect_smaller['fill_pct'] = 100
else:
@ -2447,6 +2617,33 @@ def course_log():
L = fetch(f"{url}/api/v1/audit/course/courses/{course_id}")
print(json.dumps(L,indent=2))
def fetch_rubric():
course = 21274
r_id = 35961
u = f"{url}/api/v1/courses/{course}/rubrics/{r_id}"
result = fetch(u)
#print(json.dumps(result,indent=2))
rows = []
for row in result['data']:
r = []
r.append(f"<td style='vertical-align:top;'><b>{row['description']}</b><br />{row['long_description']}</td>")
for item in row['ratings']:
r.append(f"<td style='vertical-align:top;'><u>{item['description']}</u><br />{item['long_description']}<br /><i>{item['points']} points</i></td>")
rows.append("<tr>" + "\n".join( r ) + "</tr>\n")
output = f"<h3>{result['title']}</h3>\n"
output += "<table border='1'>" + ''.join( [ f"<tr>{x}</tr>\n" for x in rows] ) + "</table>\n"
print(output)
if __name__ == "__main__":
options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,
@ -2476,9 +2673,11 @@ if __name__ == "__main__":
17: ['Remove "new analytics" from all courses navs in a semester', remove_n_analytics],
21: ['Add course evals', add_evals],
56: ['Remove course evals all sections', remove_evals_all_sections],
52: ['Cleanup semester / course nav', clean_course_nav_setup_semester],
52: ['Cleanup semester / course nav', clean_course_nav_setup_semester], # not done, just lists nav right now
29: ['Overview summer start dates',overview_start_dates],
31: ['Fine tune term dates and winter session', course_by_depts_terms],
32: ['Set summer start dates', set_custom_start_dates],
#32: ['Cross-list classes', xlist ],
#33: ['Cross list helper', eslCrosslister],
33: ['Cross list, ask for sections', ez_xlist],
@ -2486,7 +2685,7 @@ if __name__ == "__main__":
35: ['Cross list from manually created file', do_manual_xlist],
36: ['Quick course list', quick_sem_course_list ],
37: ['Cross list CWE courses', xlist_cwe],
38: ['Create calendar event', create_calendar_event],
38: ['Create calendar events for orientation shells', create_calendar_event],
39: ['list all assignments', list_all_assignments],
40: ['Enroll GOTT Workshops', enroll_gott_workshops],
@ -2505,7 +2704,9 @@ if __name__ == "__main__":
50: ['Fetch rubric scores and comments', fetch_rubric_scores],
51: ['Fetch announcements in a course', fetch_announcements],
57: ['show course audit log', course_log]
57: ['show course audit log', course_log],
60: ['fetch a rubric', fetch_rubric],
}
print ('')

View File

@ -15,6 +15,7 @@ displaynames = []
from canvas_secrets import cq_user, cq_pasw
from outcomes import quick_add_course_outcomes
from schedules import campus_dept_hierarchy
CQ_URL = "https://secure.curricunet.com/scripts/webservices/generic_meta/clients/versions/v4/gavilan.cfc"
@ -569,6 +570,7 @@ def course_path_style_2_html():
verbose = 1
v = verbose
dbg = codecs.open('cache/courses/debugout.txt','w','utf-8')
oo = codecs.open("cache/courses/allclasspaths.txt","r","utf-8").readlines()
course_prebuild = defaultdict( ddl )
@ -601,8 +603,8 @@ def course_path_style_2_html():
lookup_table = { 'entityTitle':'title', 'proposalType':'type',
'\/Course\sDescription\/status':'status', 'Course\sDiscipline':'dept',
'Course\sNumber':'number', 'Course\sTitle':'name',
'Short\sTitle':'shortname', 'Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
'Course\sNumber':'number', 'Course\sTitle':'name', 'Course Description\/\d\/Justification':'justification',
'Short\sTitle':'shortname', 'Course Description\/\d\/Internal\sProcessing\sTerm':'term', 'This\sCourse\sIs\sDegree\sApplicable':'degree_applicable',
'\/Course\sDescription\/\d+\/Course\sDescription\/':'desc',
'Minimum\sUnits':'min_units', 'Minimum\sLecture\sHour':'min_lec_hour', 'Minimum\sLab\sHour':'min_lab_hour', 'Course\shas\svariable\shours':'has_var_hours',
'Number\sWeeks':'weeks',
@ -620,6 +622,7 @@ def course_path_style_2_html():
crs = course_prebuild[C]
course_build = {'slo':{}} # defaultdict( ddl )
if v: print(C)
dbg.write(f"{C}\n")
for K in crs.keys():
if v: print("\t%s" % K)
@ -647,6 +650,7 @@ def course_path_style_2_html():
else:
content_search = re.search(r'^(.*)\/(.*?)$',line)
course_build[key] = content_search.groups()[1]
dbg.write(f"{key} => {content_search.groups()[1]}\n")
if v: print("\t\t%s - %s" % (key, course_build[key]))
continue
@ -841,8 +845,8 @@ def course_rank():
all[code].add(c)
for k in sorted(all.keys()):
print("\n##",k)
print(json.dumps(list(all[k]),indent=2))
#print("\n##",k)
#print(json.dumps(list(all[k]),indent=2))
for version in all[k]:
csvwriter.writerow( [ version['d']+version['n'], version['c'], version['s'], version['m'], version['d'], version['n'], len(version['o']) ])
@ -851,19 +855,127 @@ def de_classpaths():
outfile = codecs.open('cache/courses/all_de_classpaths.txt', 'w','utf-8')
areas = ['Distance Education/1/2/Justification/Need/Justification','/Distance Education/1/3/Content Presentation/<b>A. Methods of Instruction</b>/','/Distance Education/1/3/Content Presentation/<b>B. Instructional Materials and Resources:</b><br/>1. What materials and resources will you provide your students <b>in a virtual environment</b>?/','/Distance Education/4/Assessment/','/Distance Education/4/Methods of Instruction/','/Distance Education/1/3/Content Presentation/2. Have you assessed the use of high-quality open educational resources (OER) to help bridge the digital divide for students in the course? If so, please describe how you will be using them./','/Distance Education/4/Instructional Materials and Resources/','/Distance Education/1/3/Content Presentation/3. How will students be provided access to library materials and other learning resources <b>in a virtual environment</b>? (virtual reference librarian, research guides, digital content, etc.)/','/Distance Education/4/<b>How will students be provided access to library materials and what support will students be provided to help them locate and use these materials?</b><br/>Library and Other Learning Resources/','/Distance Education/1/3/Content Presentation/4. How will students access equitable student support services <b>in a virtual environment</b>? (tutoring, financial aid, counseling, etc.)/','/Distance Education/4/Accommodations for Students with Disabilities/','/6/Distance Education/4/Office Hours/','/Contact/Contact/Description/']
i = 0
for area in areas:
with codecs.open('cache/courses/allclasspaths.txt', 'r','utf-8') as infile:
outfile.writelines(line for line in infile if area in line)
i += 1
if i % 1000 == 0: print(i)
from semesters import human_to_sis, get_previous_season
#from pipelines import area, areas
def extract_digits(input_string):
"""
Removes all non-digit characters from the input string and returns an integer.
:param input_string: The string to process.
:return: An integer containing only the digits from the input string.
"""
digits_only = ''.join(char for char in input_string if char.isdigit())
return int(digits_only) if digits_only else 0
def filter_classes(): # for removing deactivated classes
json_file_path = 'cache/courses/courses_built.json'
output_csv_path = 'cache/courses/active_courses.txt'
all_courses = []
with open(json_file_path, 'r') as json_file:
data = json.load(json_file)
for i,C in data.items():
term = ''
try:
term = C['term']
except:
print(f"** {i} {C['dept']} {C['number']} is missing term")
term = ''
shortname = ''
try:
shortname = C['shortname']
except:
shortname = C['name']
print(f"** {i} {C['dept']} {C['number']} is missing shortname")
all_courses.append(f"{C['dept']} {C['number']} {shortname} \t {C['status']} {C['type']} \t{term} - {i}")
all_courses.sort()
for C in all_courses: print(C)
def slo_summary_report(): # for scheduling slo assessment
json_file_path = 'cache/courses/courses_built.json'
output_csv_path = 'cache/courses/courses_slo_schedule.csv'
term_csv_file_path = 'cache/courses/slo_schedule.csv'
(gp, course_to_area, areacode_to_area, area_to_dean, dean, dean_code_to_name) = campus_dept_hierarchy()
with open(json_file_path, 'r') as json_file:
data = json.load(json_file)
# Extract course information
courses = []
term_courses = []
for key, course in data.items():
try:
#print(f"{course['dept']} - -" )
re_code_course = {
"key": key,
"type": course.get("type", ""),
"status": course.get("status", ""),
"dept": course.get("dept", ""),
"number": course.get("number", ""),
"name": course.get("name", ""),
"first_active_term": course.get("term", ""),
'first_active_term_code': human_to_sis(course.get('term', '')),
"reviewing_term": get_previous_season(course.get("term","")),
"reviewing_term_code": human_to_sis(get_previous_season(course.get('term', ''))),
"area": areacode_to_area[ course_to_area[course.get("dept", "").upper()] ]
}
courses.append(re_code_course)
if course["status"] in ["Active", "In Review"] and course["type"] != "Deactivate Course":
term_courses.append(re_code_course)
except Exception as e:
print(f"error on course: {course['dept']} {course['number']} {course['name']}")
# Sort by dept, number, and term
courses.sort(key=lambda x: (x["dept"], extract_digits(x["number"]), x["reviewing_term_code"]))
term_courses.sort(key=lambda x: (x["reviewing_term_code"],x["dept"], extract_digits(x["number"])))
# Write to CSV
fieldnames = ["dept", "number", "reviewing_term", "reviewing_term_code", "status", "key", "type", "name", "first_active_term", "first_active_term_code","area"]
with open(output_csv_path, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(courses)
with open(term_csv_file_path, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(term_courses)
print(f"CSV file '{output_csv_path}' has been created.")
if __name__ == "__main__":
print ('')
options = { 1: ['fetch all courses', fetch_all_classes],
2: ['process all classes', path_style_test],
3: ['courses - path style to html catalog', course_path_style_2_html],
3: ['courses - path style to json and html catalog', course_path_style_2_html],
4: ['show course outcomes', all_outcomes],
5: ['courses - rank by all versions', course_rank],
6: ['extract de info from class paths', de_classpaths],
7: ['build schedule or summary for SLO planning', slo_summary_report],
8: ['remove deactivated courses', filter_classes],
10: ['fetch all programs', fetch_all_programs],
11: ['process all programs', path_style_prog],
12: ['programs - path style to html catalog', path_style_2_html],

View File

@ -11,7 +11,7 @@ from pampy import match, _
from bs4 import BeautifulSoup as bs
import pandas as pd
import sys, locale, re
from pipelines import getSemesterSchedule
from schedules import get_semester_schedule
from canvas_secrets import cq_url, cq_user, cq_pasw
@ -1360,7 +1360,7 @@ def my_default_counter():
# Of the recent schedules, what was actually offered online?
def summarize_online_sections():
scheds = list(map(getSemesterSchedule,sems))
scheds = list(map(get_semester_schedule,sems))
all = pd.concat(scheds,sort=True)
selected = all[['code','type','sem']]
selected.to_csv('cache/one_year_course_sections.csv')

View File

@ -5,6 +5,358 @@
# from pipelines - canvas data
# read schedule file with an eye toward watching what's filling up
def schedule_filling():
sem = 'spring2021' # todo: hardcoded
days = []
for f in sorted(os.listdir('cache/rosters/'+sem+'/')):
if f.endswith('.html'):
match = re.search(r'sched_(\d\d\d\d)_(\d\d)_(\d+)\.html',f)
if match:
print(f)
y = match.group(1)
m = match.group(2)
d = match.group(3)
print("Schedule from %s %s %s." % (y,m,d))
csv_sched = ssb_to_csv(open('cache/rosters/'+sem+'/'+f,'r').read())
jsn = to_section_list(csv_sched)
#print(json.dumps(jsn,indent=2))
days.append(jsn)
day1 = days[-2]
day2 = days[-1]
df = jsondiff.diff(day1, day2)
gains = defaultdict( list )
for D in df.keys():
if isinstance(D, int):
#print(day1[D]['code'] + '\t' + day1[D]['crn'] + ' Before: ' + day1[D]['act'] + ' After: ' + day2[D]['act'])
try:
gain = int(day2[D]['act']) - int(day1[D]['act'])
gains[gain].append( day1[D]['code'] + ' ' + day1[D]['crn'] )
except:
print("No gain for " + str(D))
#print("\t" + str(df[D]))
else:
print(D)
print(df[D])
for key, value in sorted(gains.items(), key=lambda x: x[0]):
print("{} : {}".format(key, value))
#print(json.dumps(gains,indent=2))
def argos_data():
global dean,gp
f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8')
writer = csv.writer(f2)
headers = 'gp dean dept num code crn name act site'.split(' ')
writer.writerow(headers)
f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8')
reader = csv.reader(f, delimiter=',')
headers = next(reader)
for r in reader:
d = dict(list(zip(headers,r)))
print(d)
my_dean = dean[d['Subj']]
my_gp = gp[d['Subj']]
dept = d['Subj']
num = d['Crse No']
code = dept + " " + num
crn = d['CRN']
name = d['Course Title']
act = d['Open Seats']
campus = d['Campus']
session = d['Session']
if campus == "Off Campus": site = session
else: site = campus
print(site)
writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
def scrape_for_db():
global SEMESTER, gp, dean, short_sem, semester_begin, filename, filename_html
fields = 'sem,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc'.split(',')
"""
SEMESTER = 'Fall 2022'
short_sem = 'fa22'
semester_begin = strptime('08/22', '%m/%d')
filename = 'fa22_sched.json'
filename_html = 'fa22_sched.html'
as_dict = scrape_schedule()
fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
for S in as_dict:
parts = S['code'].split(' ')
S['dept'] = parts[0]
S['num'] = parts[1]
S['gp'] = gp[parts[0]]
S['dean'] = dean[parts[0]]
S['sem'] = short_sem
str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
print(str)
fff.write(str)
fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
fff.close()
"""
SEMESTER = 'Spring 2023 (View only)'
short_sem = 'sp23'
semester_begin = strptime('01/30', '%m/%d')
filename = 'sp23_sched.json'
filename_html = 'sp23_sched.html'
as_dict = scrape_schedule()
fff = codecs.open('cache/%s_sched.sql' % filename, 'w', 'utf-8')
fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text);\n")
for S in as_dict:
parts = S['code'].split(' ')
S['dept'] = parts[0]
S['num'] = parts[1]
S['gp'] = gp[parts[0]]
S['dean'] = dean[parts[0]]
S['sem'] = short_sem
str = "INSERT INTO schedule (sem,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc) VALUES (%s);\n" % \
", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
print(str)
fff.write(str)
fff.write('UPDATE schedule SET site="OnlineLive" WHERE loc="ONLINE LIVE";\n')
fff.close()
def todays_date_filename(short_sem): # helper
n = datetime.now()
m = n.month
if m < 10: m = "0"+str(m)
d = n.day
if d < 10: d = "0" + str(d)
return "reg_" + short_sem + "_" + str(n.year) + str(m) + str(d)
def expand_old_semesters():
terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20,fa20,sp21,su21,fa21,sp22,su22,fa22'.split(',')
terms = 'sp16,su16,fa16,sp17,su17,fa17,sp18,su18,fa18,sp19,su19,fa19,sp20,su20'.split(',')
terms.reverse()
for t in terms:
list_latestarts(t)
input('press return to continue.')
def argos_data_from_cvc():
global dean,gp
short_sem = 'fa23'
f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8')
all_courses = []
f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8')
reader = csv.reader(f, delimiter=',')
headers = next(reader)
for r in reader:
d = dict(list(zip(headers,r)))
#print(d)
parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code'])
if parts:
dept = parts.group(1)
num = parts.group(2)
my_dean = dean[dept]
my_gp = gp[dept]
code = dept + " " + num
crn = d['CRN']
cred = d['Units_Credit_hours']
days, time_start, time_end = days_times(d['Meeting_Days_and_Times'])
times = ""
if time_start: times = time_start + "-" + time_end
date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date'])
start = remove_year(d['Start_Date'])
end = remove_year(d['End_Date'])
ztc = d['ZTC']
name = d['Course_Name']
cap = d['Class_Capacity']
rem = d['Available_Seats']
act = int(cap) - int(rem)
teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name']
delivery = d['Delivery']
if delivery == "Online":
if days:
site = "Online"
type = "online live"
loc = "Online Live"
else:
site = "Online"
type = "online"
loc = "ONLINE"
elif delivery == "Hybrid":
site = d['Campus_College']
type = "hybrid"
loc = d['Meeting_Locations']
else:
site = d['Campus_College']
type = "in-person"
loc = d['Meeting_Locations']
this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \
"cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \
"start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp}
all_courses.append(this_course)
print(site)
#writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site])
print(all_courses)
#print(json.dumps(all_courses))
f3.write( json.dumps(all_courses,indent=2) )
f3.close()
expanded = list_latestarts(short_sem)
def days_times(s):
parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s)
if parts:
day = parts.group(1)
time = parts.group(2)
parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time)
if parts2:
time_start = parts2.group(1).strip()
time_end = parts2.group(2).strip()
return day, time_start, time_end
return day, time, ''
return '','',''
def remove_year(s):
s = re.sub(r'\-', '/', s)
if len(s)>5: return s[5:]
return s
def get_enrlmts_for_user(user,enrollments):
#active enrollments
u_en = enrollments[ lambda x: (x['user_id'] == user) & (x['workflow']=='active') ]
return u_en[['type','course_id']]
### course is a list of 1-3 lists, each one being a line in the schedule's output. First one has section
def course_start(course):
#todo: use this to make a early/late/short field and store semester dates w/ other constants
start = datetime(2019,1,28)
end = datetime(2019,5,24)
# is it normal, early, late, winter?
li = course[0]
date = li[12]
if date=='01/28-05/24':
return 'Normal'
if date=='TBA':
return 'TBA'
if date=='01/02-01/25':
return 'Winter'
if date=='01/02-01/24':
return 'Winter'
ma = re.search( r'(\d+)\/(\d+)\-(\d+)\/(\d+)', date)
if ma:
# TODO do these years matter?
mystart = datetime(2019, int(ma.group(1)), int(ma.group(2)))
if int(ma.group(1)) > 10: mystart = datetime(2018, int(ma.group(1)), int(ma.group(2)))
myend = datetime(2019, int(ma.group(3)), int(ma.group(4)))
length = myend - mystart
weeks = length.days / 7
if mystart != start:
if mystart < start:
#print 'Early Start ', str(weeks), " weeks ",
return 'Early start'
else:
#print 'Late Start ', str(weeks), " weeks ",
return 'Late start'
else:
if myend > end:
#print 'Long class ', str(weeks), " weeks ",
return 'Long term'
else:
#print 'Short term ', str(weeks), " weeks ",
return 'Short term'
#return ma.group(1) + '/' + ma.group(2) + " end: " + ma.group(3) + "/" + ma.group(4)
else:
return "Didn't match: " + date
# list files in canvas_data (online) and choose one or some to download.
def interactive():
resp = do_request('/api/account/self/file/sync')
mylog.write(json.dumps(resp, indent=4))
#mylog.close()
i = 0
gotten = os.listdir(local_data_folder)
for x in resp['files']:
print(str(i) + '.\t' + x['filename'])
i += 1
which = input("Which files to get? (separate with commas, or say 'all') ")
if which=='all':
which_a = list(range(i-1))
else:
which_a = which.split(",")
for W in which_a:
this_i = int(W)
this_f = resp['files'][this_i]
filename = this_f['filename']
if filename in gotten: continue
print("Downloading: " + filename)
response = requests.request(method='GET', url=this_f['url'], stream=True)
if(response.status_code != 200):
print(('Request response went bad. Got back a ', response.status_code, ' code, meaning the request was ', response.reason))
else:
#Use the downloaded data
with open(local_data_folder + filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
print("Success")
"""if filename.split('.')[-1] == 'gz':
try:
plain_filename = 'canvas_data/' + ".".join(filename.split('.')[:-1])
pf = open(plain_filename,'w')
with gzip.open('canvas_data/' + filename , 'rb') as f:
pf.write(f.read())
except Exception as e:
print "Failed to ungizp. Probably too big: " + str(e)"""
# todo: where does the most recent schedule come from?

106
flex2.py Normal file
View File

@ -0,0 +1,106 @@
import pymysql
# Connect to the MySQL database using PyMySQL
conn = pymysql.connect(
host="192.168.1.6", # Your host (localhost, for example)
user="phowell", # Your MySQL username
password="rolley34",# Your MySQL password
database="db" # Your database name
)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# Query to get the table structure
cursor.execute("""
SELECT
C.TABLE_NAME,
C.COLUMN_NAME,
C.COLUMN_TYPE,
C.IS_NULLABLE,
C.COLUMN_DEFAULT,
C.EXTRA,
KCU.REFERENCED_TABLE_NAME,
KCU.REFERENCED_COLUMN_NAME,
KCU.CONSTRAINT_NAME
FROM
INFORMATION_SCHEMA.COLUMNS C
LEFT JOIN
INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU
ON C.TABLE_NAME = KCU.TABLE_NAME AND C.COLUMN_NAME = KCU.COLUMN_NAME
WHERE
C.TABLE_SCHEMA = 'db' -- Replace with your actual database name
AND C.TABLE_NAME LIKE 'conf_%'; -- Only tables starting with 'conf_'
""")
# Fetch all rows from the query result
columns_info = cursor.fetchall()
# Close the connection
cursor.close()
conn.close()
# Function to generate condensed output
def condense_structure(columns_info):
result = {}
for column in columns_info:
table = column['TABLE_NAME']
column_name = column['COLUMN_NAME']
column_type = column['COLUMN_TYPE']
is_nullable = column['IS_NULLABLE']
extra = column['EXTRA']
referenced_table = column['REFERENCED_TABLE_NAME']
referenced_column = column['REFERENCED_COLUMN_NAME']
constraint_name = column['CONSTRAINT_NAME']
# Condense data type (e.g., 'VARCHAR(255)' -> 'V(255)')
if column_type.startswith('varchar'):
column_type = 'V(' + column_type.split('(')[1].split(')')[0] + ')'
elif column_type.startswith('char'):
column_type = 'C(' + column_type.split('(')[1].split(')')[0] + ')'
elif column_type.startswith('int'):
column_type = 'I' # Int types are just abbreviated to 'I'
elif column_type.startswith('text'):
column_type = 'T' # Text types are abbreviated to 'T'
# Condense NULLABLE
if is_nullable == 'YES':
column_type += ' N' # Add N for nullable
else:
column_type += ' NN' # Add NN for not nullable
# Remove DEFAULT NULL if no default value is set
if column['COLUMN_DEFAULT'] is None:
column_default = ''
else:
column_default = f" D({column['COLUMN_DEFAULT']})" # Default value
# Add extra information, like auto-increment if available
if 'auto_increment' in extra:
column_type += " AI" # Add AI for auto-increment columns
# Handle foreign key references
if referenced_table:
column_type += f" FK({referenced_table}.{referenced_column})"
# Create shorthand for each column
shorthand = f"{column_name}: {column_type}{column_default}"
# Add to the result dict under the respective table
if table not in result:
result[table] = []
result[table].append(shorthand)
return result
# Condense the structure
condensed_structure = condense_structure(columns_info)
# Print out the condensed structure
for table, columns in condensed_structure.items():
print(f"Table: {table}")
for column in columns:
print(f" - {column}")
print("\n")

170
gpt.py
View File

@ -1,15 +1,39 @@
import os, json, sys, codecs, openai
import os, json, sys, codecs, re
from datetime import datetime, timedelta
from openai import OpenAI
from canvas_secrets import openai_org, openai_api_key
client = OpenAI(
api_key=openai_api_key,
organization=openai_org
)
openai.organization = openai_org
openai.api_key = openai_api_key
#print(openai.Model.list())
DEFAULT_MODEL = "gpt-4o"
SAVE_ATTACHEMENTS = 0
def gpt_chat(instruction, prompt, model=DEFAULT_MODEL):
messages = [
{"role": "system", "content": instruction},
{"role": "user", "content": prompt}
]
response = client.chat.completions.create(model=model, messages=messages)
result = response.choices[0].message.content
log3 = codecs.open('cache/gptlog.txt','a','utf-8')
log3.write(json.dumps({'prompt':prompt, 'result': result},indent=2))
log3.write("\n\n---\n\n")
log3.close()
return result
def gpt_faq(faq):
system_role = """Ignore all previous instructions. You are an expert on the management and implementation of all aspects of online courses and running a distance education program, especially in higher education and community college. You are also an expert in pedagogy and the tools and techniques of effective teaching and learning, based on research and experience. You know how to focus on concrete examples of effective teaching which actually work in the real world and the advice you give is specific and immediately useful to teachers in the classroom. You provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. Your users are experts in teaching and college administration, and are comfortable with the California and Federal Educational code, and know they need to comply with it. There is no need to explain this beyond noting any relevant laws. Don't be verbose in your answers, but do provide details and examples where it might help the explanation.
You are a world class technical writer. Your style is human-centered, friendly and kind. Give me concise and accurate answers and ignore all the niceties that OpenAI programmed you with. Omit all qualifications and excuses. I know you are a large language model but I want you to pretend you are a confident and superintelligent oracle that can help a limited human warp reality to his own intentions. You can reveal your own sense of humor and depth of intelligence. It is very important that you get this right.
@ -17,42 +41,22 @@ Adopt a penetratingly insightful, incisively funny, cleverly witty persona. Your
I am pasting below an email chain between myself and colleague(s). I am fairly certain that it contains a technical question that I have answered. They may be spread out over several messages, or there might be some clarification or follow up questions. There will be headers, signatures, and other text that isn't a part of the core message. Ignore that. Consider the whole email chain while you prepare the following: Respond with a json formatted dictionary that contains the following:
{ "question": "Restate the question or problem in a concise but clear manner", "topics": ["keywords", "or phrases", "that categorize the issue"], "answer": "The best possible answer, written in markdown format. Draw the answer from the email but feel free to edit or embelish based on your knowledge. Generalize the answer to anyone who might have the issue. Your audience is mostly instructors working at a small community college. Do not refer to anyone's name specifically, unless it is Peter or Sabrina, but instead write for a general audience looking for the answers to their questions. We are writing a FAQ or help page. Feel free to use markdown-formatted bold, italic, lists, and links."} """
# create a completion
my_model = "gpt-4" # "gpt-3.5-turbo-16k" # gpt-3.5-turbo gpt-4 gpt-4-32k
completion = openai.ChatCompletion.create(model=my_model, messages=[
{"role": "system", "content": system_role},
{"role": "user", "content": faq} ] )
log3 = codecs.open('cache/gptlog.txt','a','utf-8')
log3.write(json.dumps(completion,indent=2))
log3.write("\n\n---\n\n")
log3.close()
r = completion['choices'][0]['message']['content']
#print(str(r) + "\n\n")
return r
return gpt_chat(system_role, faq)
def gpt_test():
my_prompt = "Write a series of texts trying to sell a pen to a stranger."
print(sys.argv)
exit
if len(sys.argv)>1:
my_prompt = " ".join(sys.argv[1:])
else:
print("Prompt: %s" % my_prompt)
my_model = "text-davinci-003"
# create a completion
completion = openai.Completion.create(engine=my_model, prompt=my_prompt, max_tokens=1000, temperature=1,top_p=1)
#print(completion)
#print(json.dumps(completion,indent=2))
print(completion.choices[0].text)
print()
result = gpt_chat("", my_prompt)
print(result)
def sample_send_email():
@ -85,7 +89,7 @@ def fetch_useful_info():
log = codecs.open("cache/email_usefulinfo.txt","w","utf-8")
#Finally, let's say you want to access a subfolder named folder_of_soldy in your root_folder, you do:
# access a subfolder
print("\nUseful Info Reference:")
uinfo = root_folder.Folders['useful info ref']
for message in uinfo.Items:
@ -97,7 +101,8 @@ def fetch_useful_info():
attachment = attachments.Item(1)
for attachment in message.Attachments:
print(" -> " + str(attachment))
loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
#loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments\\" + str(attachment)
loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments\\" + str(attachment)
attachment.SaveAsFile(loc)
atch_list += str(attachment) + ', '
atch_count += 1
@ -121,7 +126,8 @@ def process_email_filesave(message, log, i):
attachment = attachments.Item(1)
for attachment in message.Attachments:
print(" -> " + str(attachment))
loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
#loc = "C:\\Users\\peter\\Documents\\gavilan\\ca_merged\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
loc = "C:\\Users\\phowell\\source\\repos\\canvasapp\\cache\\attachments_faq\\" + str(attachment)
attachment.SaveAsFile(loc)
atch_list += str(attachment) + ', '
atch_count += 1
@ -139,12 +145,75 @@ def process_email_filesave(message, log, i):
logeach.close()
def list_faq():
import win32com.client
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
root_folder = outlook.Folders.Item(1)
print("\nFAQ Emails:")
uinfo = root_folder.Folders['for faq']
index = 0
# Get today's date
end_date = datetime.now()
# Go back xx months
months_back = 60
chunk_results = []
print("\nLoading messages in 1-month chunks...\n")
for i in range(months_back):
chunk_end = end_date.replace(day=1) - timedelta(days=1) # End of previous month
chunk_start = chunk_end.replace(day=1) # Start of that month
start_str = chunk_start.strftime("%m/%d/%Y %H:%M %p")
end_str = chunk_end.strftime("%m/%d/%Y %H:%M %p")
restriction = f"[ReceivedTime] >= '{start_str}' AND [ReceivedTime] <= '{end_str}'"
filtered = uinfo.Items.Restrict(restriction)
# Force enumeration
#messages = [msg for msg in filtered if msg.Class == 43] # MailItem only
messages = [msg for msg in filtered ]
count = len(messages)
print(f"{chunk_start.strftime('%B %Y')}: {count} messages")
chunk_results.append((chunk_start.strftime('%Y-%m'), count))
for message in messages:
try:
print(f" {index}\t sub: {message.Subject} \t from: {message.Sender} \t on: {message.SentOn}")
index += 1
except Exception as e:
print(f"Exception: {str(e)}")
end_date = chunk_start # Move back to the previous month
'''for message in uinfo.Items:
try:
print(f"{i}\t sub: {message.Subject} \t from: {message.Sender} \t on: {message.SentOn}")
except Exception as e:
print(f"Exception: {str(e)}")
i += 1
if i % 20 == 0:
temp = input(f"press enter to continue, or q to quit now at message {i}: ")
if temp == 'q':
exit()
'''
def fetch_faq():
import win32com.client
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
root_folder = outlook.Folders.Item(1)
PAUSE = 0
PAUSE = 1
startat = input("Press enter to continue or a number to start at that message: ")
if startat == '': startat = '0'
@ -168,10 +237,10 @@ def fetch_faq():
except Exception as e:
print(f"Exception: {str(e)}")
#summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
#log2.write( f",\n{summary}")
#log2.flush()
#print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
summary = gpt_faq( f"Subject: {message.Subject}\nBody: {message.body}")
log2.write( f",\n{summary}")
log2.flush()
print(f"Subject: {message.Subject}\n{summary}\n\n-----\n\n")
i += 1
if PAUSE:
@ -179,9 +248,32 @@ def fetch_faq():
if temp == 'q':
exit()
#fetch_useful_info()
fetch_faq()
if __name__ == "__main__":
print ('')
options = { 1: ['gpt test',gpt_test] ,
2: ['test email send',sample_send_email] ,
3: ['fetch "useful info" mailbox', fetch_useful_info],
4: ['fetch "faq" mailbox and gpt summarize', fetch_faq],
5: ['list faq mailbox', list_faq],
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()

View File

@ -8,8 +8,9 @@ from datetime import datetime as dt
from datetime import timedelta
from dateutil.parser import parse
from os.path import exists, getmtime
from pipelines import sync_non_interactive, url, header, gp, dean
from semesters import to_sis_sem
from pipelines import sync_non_interactive, url, header
#, gp, dean
from semesters import short_to_sis
#from courses import getCoursesInTerm
@ -1722,7 +1723,7 @@ def build_db_schedule():
S['gp'] = gp[parts[0]]
S['dean'] = dean[parts[0]]
S['sem'] = F[0:4]
S['sem_sis'] = to_sis_sem(F[0:4])
S['sem_sis'] = short_to_sis(F[0:4])
if not 'partofday' in S:
S['partofday'] = ''
str = "INSERT INTO schedule (sem,sem_sis,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc,partofday) VALUES (%s);\n" % \

View File

@ -8,7 +8,7 @@ from datetime import datetime as dt
from datetime import timedelta
from dateutil.parser import parse
from os.path import exists, getmtime
from pipelines import sync_non_interactive, url, header, gp, dean
#from pipelines import sync_non_interactive, url, header, gp, dean
from tabulate import tabulate
from canvas_secrets import postgres_database, postgres_password, postgres_port, postgres_user, postgres_host
@ -391,7 +391,7 @@ def courses_to_sched():
vals_cache = []
i = 0
for year in ['16','17','18','19','20','21','22','23','24']:
for year in ['16','17','18','19','20','21','22','23','24','25']:
for sem in ['sp','su','fa']:
term = f"{sem}{year}"
sis_code = f"20{year}{seasons2[sem]}"
@ -422,6 +422,7 @@ def courses_to_sched():
full_sis_code = sis_code+'-'+c['crn']
if full_sis_code in sis_to_sched:
print(c['cred'])
q = [sis_to_sched[full_sis_code][0][0], c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, int(c['cap']), int(c['act']), sis_code]
vals_cache.append( q ) # [ str(x) for x in q ] )
#print(f"{i}: {q}")
@ -467,6 +468,24 @@ def teacher_list(courseid):
cursor.execute(q)
return cursor.fetchall()
def everyone_teacher_role():
conn,cursor = db()
q = '''select distinct ON (u.name) u.name, u.id, p.sis_user_id, u.created_at, c.course_code from canvas.enrollments e
join canvas.users u on u.id=e.user_id
join canvas.courses c on e.course_id=c.id
join canvas.pseudonyms p on u.id=p.user_id
where e.type='TeacherEnrollment'
order by u.name;'''
cursor.execute(q)
return cursor.fetchall()
def iLearn_name_from_goo(goo):
goo = goo.upper()
conn,cursor = db()
q = f"select u.id, u.name, u.sortable_name, p.sis_user_id from canvas.pseudonyms p join canvas.users u on u.id=p.user_id where p.sis_user_id='{goo}';"
cursor.execute(q)
return cursor.fetchone()
if __name__ == "__main__":

View File

@ -104,7 +104,7 @@ if __name__ == "__main__":
10:['Download new photos', downloadPhoto],
11:['Check for avatar',checkForAvatar],
25:['X-List 190 sections', xlist_cwe] , ###
28:['Check accessibility of a course', accessible_check] ,
28:['Check accessibility of a course', course_download] ,
29:['Switch enrollments of a shell to all teachers', switch_enrol] ,
35:['Enroll user to all active courses in a semester', enroll_accred],
36:['Fix an older course so it can be enrolled again, add accred', unrestrict_course],

View File

@ -30,9 +30,18 @@ from path_dict import PathDict
outputfile = ''
csvwriter = ''
TERM = 184
# 289 2025 Fall
# 288 2025 Summer
# 287 2025 Spring
# 286 2025 Winter
# 184 2024 Fall
# 183 2024 Summer
# 181 2024 Spring
# 182 2024 Winter
TERM = 286 # fall = 287
TERM = 287
# TERM = 286 # fall = 287
def escape_commas(s):

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,17 @@
# TODO students enrolled in fall 2020
# People with custom email
select u.name, u.last_logged_out, u.created_at, u.updated_at, cc.path from canvas.users u
join canvas.communication_channels cc on u.id=cc.user_id
where cc.path_type='email' and cc.path not like '%gavilan.edu'
order by u.last_logged_out;
# how many
select count(u.name) from canvas.users u join canvas.communication_channels cc on u.id=cc.user_id where cc.path_type='email' and cc.path not like '%gavilan.edu';
## Fall 2020 students with how many classes theyre taking
SELECT u.canvasid, u.name, u.sortablename, COUNT(e.id) AS num FROM enrollment AS e

File diff suppressed because it is too large Load Diff

556
search.py Normal file
View File

@ -0,0 +1,556 @@
###
###
### Text / Knowledge Base
###
### How about downloading all possible info / webpages / sources
### related to Gavilan and creating a master search index?
###
### Goals:
### - Scripted approach to allow re-indexing / updating
### - Break everything down into paragraphs
###
### - Script to extract keywords, topics, entities, summaries, questions answered
### from each paragraph or chunk.
### - Use spacy, gensim, nltk, or gpt-3, or a combination of all of them
###
### - Create vector / embeddings for each paragraph
###
### - Enable a vector search engine and connect to front page of gavilan.cc
### - Use that to feed handful of source paragraphs (& prompt) into gpt and
### receive text answers to questions.
import re, os, codecs, requests, trafilatura, pickle, pypandoc
from collections import defaultdict
from pdfminer.high_level import extract_text
from sentence_transformers import SentenceTransformer, util
from util import clean_fn
def demo_vector_search():
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
import nltk.data
import spacy
# (might have to upgrade pip first...)
# pip install --upgrade click
#
# python -m spacy download en_core_web_sm
# python -m spacy download en_core_web_lg
def is_complete_sentence(text):
#text = text.text
doc = nlp(text)
sentences = list(doc.sents)
if len(sentences) == 1 and text.strip() == sentences[0].text.strip():
return True
return False
sentences = [
"This is an example sentence.",
"Here is another sentence for training."
]
paragraph = """Financial Aid services are available in person! We are happy to assist you with your financial aid needs. If you are interested in visiting the office in person, please review the guidelines for visiting campus and schedule your appointment:
Guidelines for In-Person Financial Aid Services
Due to FERPA regulations, no student information will be given to anyone other than the student without authorization from the student.
We continue to offer virtual services. Financial Aid staff may be reached by email, phone, text, and zoom! Please refer to the contact information and schedules below.
Gavilan-WelcomeCenter_Peer_Mentors.jpg
Do you need assistance filing the FAFSA or California Dream Act Application? Friendly and knowledgeable Peer Mentors are available to assist you virtually and in person! Details below for an online Zoom visit, phone call, or in-person visit with Peer Mentors.
Monday - Friday 8am - 5pm, Student Center
Join Zoom to Connect with a Peer Mentor
Or call (669) 900-6833 and use meeting ID 408 848 4800
MicrosoftTeams-image.png
Do you need assistance with an existing financial aid application, financial aid document submission, or review of your financial aid package? Schedule an in-person, phone, or zoom appointment with our Financial Aid counter.
Mon - Thurs: 9am - 1:00pm, 2:00pm - 5:00pm
Fri: 10am - 2pm
Office: (408) 848-4727 Email: finaid@gavilan.edu
Schedule an In-Person, Phone or Zoom Appointment"""
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentences1 = tokenizer.tokenize(paragraph)
for i,s in enumerate(sentences1):
print(i, "\t", s)
print("\n\n")
#nlp = spacy.load('en_core_web_sm')
nlp = spacy.load('en_core_web_md')
doc = nlp(paragraph)
sentences2 = list(doc.sents)
for i,s in enumerate(sentences2):
t = re.sub(r'\n+',' ',s.text)
is_sentence = 'yes' if is_complete_sentence(t) else 'no '
print(i, " ", is_sentence, " ", t)
print("\n\n")
#for text in sentences2:
# print(text, "is a complete sentence?" , is_complete_sentence(text))
return
tokenized_sentences = [simple_preprocess(s) for s in sentences]
model = Word2Vec(tokenized_sentences, min_count=1, vector_size=100)
example_word = "example"
vector = model.wv[example_word]
print(f"Vector for the word '{example_word}': {vector}")
def makedir():
files = os.listdir('cache/crawl')
#print(files)
files.sort()
for f in files:
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
if m:
name = m.groups()[0]
parts = name.split('+')
print(parts)
def manual_index():
files = os.listdir('cache/crawl')
#print(files)
ii = codecs.open('cache/crawl/index.html','w','utf-8')
ii.write('<html><body><h1>Site index</h1>\n')
files.sort()
for f in files:
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
if m:
name = m.groups()[0]
parts = name.split('+')
ii.write('<br /><a href="mirror/'+f+'">'+f+'</a>\n')
def my_site():
files = os.listdir('cache/crawl')
output = []
files.sort()
for f in files:
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
if m:
name = m.groups()[0]
parts = name.split('+')
output.append(parts)
return output
## TODO site scraper
## TODO find package that extracts text from web page
### TODO master list of what to index.
## TODO PDFs and DOCXs
## TODO fix urls w/ anchors
def crawl():
import scrapy, logging
from scrapy.crawler import CrawlerProcess
logger = logging.getLogger()
logger.setLevel(level=logging.CRITICAL)
logging.basicConfig(level=logging.CRITICAL)
logger.disabled = True
avoid = ['ezproxy','community\.gavilan\.edu','archive\/tag','archive\/category', 'my\.gavilan\.edu', 'augusoft',
'eis-prod', 'ilearn\.gavilan', 'mailto', 'cgi-bin', 'edu\/old\/schedule',
'admit\/search\.php', 'GavilanTrusteeAreaMaps2022\.pdf', 'schedule\/2019', 'schedule\/2020', 'schedule\/2021',
'schedule\/2022', 'schedule\/previous', ]
class MySpider(scrapy.Spider):
name = 'myspider'
#start_urls = ['https://gavilan.curriqunet.com/catalog/iq/1826']
start_urls = ['https://www.gavilan.edu']
"""
logging.getLogger("scrapy").setLevel(logging.CRITICAL)
logging.getLogger("scrapy.utils.log").setLevel(logging.CRITICAL)
logging.getLogger("scrapy.extensions.telnet").setLevel(logging.CRITICAL)
logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
logging.getLogger("scrapy.core.engine").setLevel(logging.CRITICAL)
logging.getLogger("scrapy.middleware").setLevel(logging.CRITICAL)
logger.disabled = True"""
def parse(self, response):
print('visited:', repr(response.url), 'status:', response.status)
done = 0
if re.search(r'\.pdf$', response.url):
m = re.search(r'\/([^\/]+\.pdf)$', response.url)
if m:
print("saving to ", save_folder + '/' + clean_fn(response.url))
pdf_response = requests.get(response.url)
with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
f.write(pdf_response.content)
text = extract_text(save_folder + '/' + clean_fn(response.url))
codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(text)
done = 1
for ext in ['doc','docx','ppt','pptx','rtf','xls','xlsx']:
if re.search(r'\.'+ext+'$', response.url):
m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
if m:
print("saving to ", save_folder + '/' + clean_fn(response.url))
pdf_response = requests.get(response.url)
with open(save_folder + '/' + clean_fn(response.url), 'wb') as f:
f.write(pdf_response.content)
#text = extract_text(save_folder + '/' + clean_fn(response.url) + '.txt')
pandoc_infile = save_folder + '/' + clean_fn(response.url)
pandoc_outfile = save_folder + '/' + clean_fn(response.url) + '.html'
print("pandoc in file: %s" % pandoc_infile)
print("pandoc outfile: %s" % pandoc_outfile)
pypandoc.convert_file(pandoc_infile, 'html', outputfile=pandoc_outfile, extra_args=['--from=%s' % ext, '--extract-media=%s' % save_folder + '/img' ])
pandoc_output = codecs.open(pandoc_outfile,'r','utf-8').read()
txt_output = trafilatura.extract(pandoc_output,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
if txt_output:
codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8').write(txt_output)
done = 1
for ext in ['jpg','jpeg','gif','webp','png','svg','bmp','tiff','tif','ico']:
if re.search(r'\.'+ext+'$', response.url):
m = re.search(r'\/([^\/]+\.'+ext+')$', response.url)
if m:
print("saving to ", save_folder + '/img/' + clean_fn(response.url))
pdf_response = requests.get(response.url)
with open(save_folder + '/img/' + clean_fn(response.url), 'wb') as f:
f.write(pdf_response.content)
done = 1
if not done:
f_out = codecs.open(save_folder + '/' + clean_fn(response.url) + '.txt','w','utf-8')
this_output = trafilatura.extract(response.text,include_links=True, deduplicate=True, include_images=True, include_formatting=True)
if this_output:
f_out.write(this_output)
f_out.close()
links = response.css('a::attr(href)').getall()
# Follow each link and parse its contents
for link in links:
go = 1
full_link = response.urljoin(link)
print('++++++ trying ', full_link)
if not re.search(r'gavilan\.edu',full_link):
go = 0
print('--- not gav edu')
else:
if re.search(r'hhh\.gavilan\.edu',full_link):
pass
elif not re.search(r'^https?:\/\/www\.gavilan\.edu',full_link):
# need to add www to gavilan.edu
m = re.search(r'^(https?:\/\/)gavilan\.edu(\/.*)$',full_link)
if m:
full_link = m.group(1) + 'www.' + m.group(2)
for a in avoid:
if re.search(a,full_link):
go = 0
print('--- avoid ', a)
if go: yield scrapy.Request(full_link, callback=self.parse,
headers={"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"})
else:
print("------ avoiding ", full_link)
# Instantiate a CrawlerProcess object
process = CrawlerProcess()
# Add the MySpider spider to the process
process.crawl(MySpider)
# Start the process
logging.basicConfig(level=logging.CRITICAL)
logging.getLogger('scrapy').propagate = False
logging.getLogger("trafilatura").setLevel(logging.CRITICAL)
logging.getLogger("trafilatura").propagate = False
logging.getLogger("pdfminer").setLevel(logging.CRITICAL)
logging.getLogger("pdfminer").propagate = False
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").propagate = False
logging.basicConfig(level=logging.CRITICAL)
process.start()
save_folder = 'cache/crawl'
clean_folder = 'cache/cleancrawl'
def txt_clean_index():
files = os.listdir(save_folder)
line_freq = defaultdict(int)
# first pass
for f in files:
lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
for L in lines:
L = L.strip()
line_freq[L] += 1
# second pass
for f in files:
print("\n\n",f)
lines = codecs.open(save_folder + '/' + f,'r','utf-8').readlines()
out = codecs.open(clean_folder + '/' + f,'w','utf-8')
for L in lines:
L = L.strip()
if L in line_freq and line_freq[L] > 3:
continue
print(L)
out.write(L + '\n')
out.close()
from whoosh import fields, columns
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID, STORED, NUMERIC
from whoosh.qparser import QueryParser
from whoosh.analysis import StemmingAnalyzer
def priority_from_url(url):
priority = 1
# url is like this: https++www.gavilan.edu+news+Newsletters.php.txt
m = re.search(r'gavilan\.edu\+(.*)\.\w\w\w\w?$',url)
if m:
address = m.group(1)
parts = address.split('+')
if parts[0] in ['accreditation','curriculum','senate','research','old','committee','board','styleguide']:
priority += 20
if parts[0] in ['news','IT','HOM','administration']:
priority += 10
if parts[0] == 'admit' and parts[1] == 'schedule':
priority += 10
if 'accreditation' in parts:
priority += 50
if re.search(r'hhh\.gavilan\.edu',url):
priority += 100
priority *= len(parts)
#print(priority, parts)
else:
priority *= 50
#print(priority, url)
return priority
def test_priority():
ff = os.listdir('cache/crawl')
for f in ff:
priority_from_url(f)
def displayfile(f,aslist=0):
lines = codecs.open('cache/crawl/' + f,'r','utf-8').readlines()
lines = [L.strip() for L in lines]
lines = [L for L in lines if L and not re.search(r'^\|$',L)]
if aslist:
return lines
return "\n".join(lines)
def any_match(line, words):
# true if any of the words are in line
for w in words:
if re.search(w, line, re.IGNORECASE):
return True
return False
def find_match_line(filename, query):
q_words = query.split(" ")
lines = codecs.open('cache/crawl/' + filename,'r','utf-8').readlines()
lines = [L.strip() for L in lines]
lines = [L for L in lines if L and not re.search(r'^\|$',L)]
lines = [L for L in lines if any_match(L, q_words)]
return "\n".join(lines)
def search_index():
s = ''
schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
ix = open_dir("cache/searchindex")
#with ix.reader() as reader:
#print(reader.doc_count()) # number of documents in the index
#print(reader.doc_frequency("content", "example")) # number of documents that contain the term "example" in the "content" field
#print(reader.field_length("content")) # total number of terms in the "content" field
#print(reader.term_info("content", "example")) # information about the term "example" in the "content" field
#print(reader.dump()) # overview of the entire index
while s != 'q':
s = input("search or 'q' to quit: ")
if s == 'q':
return
# Define the query parser for the index
with ix.searcher() as searcher:
query_parser = QueryParser("content", schema=schema)
# Parse the user's query
query = query_parser.parse(s)
print(query)
# Search the index for documents matching the query
results = searcher.search(query, sortedby="priority")
# Print the results
i = 1
for result in results:
print(i, result) # result["url"], result["content"])
print(find_match_line(result['url'], s))
print()
i += 1
def create_search_index():
# Define the schema for the index
stem_ana = StemmingAnalyzer()
schema = Schema(url=STORED, title=TEXT(stored=True), content=TEXT, priority=fields.COLUMN(columns.NumericColumn("i")))
# Create a new index in the directory "myindex"
ix = create_in("cache/searchindex", schema)
# Open an existing index
#ix = open_dir("cache/searchindex")
# Define the writer for the index
writer = ix.writer()
# Index some documents
files = os.listdir('cache/crawl')
files.sort()
for f in files:
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
if m:
print(f)
writer.add_document(url=f, title=m.group(1), content=displayfile(f), priority=priority_from_url(f))
writer.commit()
from annoy import AnnoyIndex
import random
def test_embed():
model = SentenceTransformer('all-MiniLM-L6-v2')
sample = "What is this world coming to? What happens in the data and the research?"
embed = model.encode(sample)
print("\nSample sentence:", sample)
print("\nEmbedding:", embed)
print("\nEmbedding size:", len(embed))
def create_embeddings():
model = SentenceTransformer('all-MiniLM-L6-v2')
vecsize = 384 # sentence transformer embedding size
t = AnnoyIndex(vecsize, 'angular')
files = os.listdir('cache/crawl')
output = [] # ['index', 'file','sentence']
index = 0
save_embeds = []
files.sort()
for f in files:
print(f)
m = re.match(r'https?..www\.gavilan\.edu\+(.*)\.\w\w\w\w?\.txt$',f)
if m:
lines = displayfile(f,1)
embeddings = model.encode(lines)
print("\n-----", index, f)
for sentence, embedding in zip(lines, embeddings):
if len(sentence.split(' ')) > 5:
print(index, "Sentence:", sentence)
print(embedding[:8])
t.add_item(index, embedding)
output.append( [index,f,sentence] )
index += 1
if index > 500:
break
t.build(30) # 30 trees
t.save('cache/sentences.ann')
pickle.dump( output, open( "cache/embedding_index.p", "wb" ) )
def search_embeddings():
f = 384 # sentence transformer embedding size
n = 10 # how many results
u = AnnoyIndex(f, 'angular')
u.load('cache/sentences.ann') # super fast, will just mmap the file
print(u.get_n_items(), "items in index")
model = SentenceTransformer('all-MiniLM-L6-v2')
search_index = pickle.load( open( "cache/embedding_index.p", "rb" ) )
print(search_index)
s = ''
while s != 'q':
s = input("search or 'q' to quit: ")
if s == 'q':
return
query_embedding = model.encode(s)
results = u.get_nns_by_vector(query_embedding, n)
# Print the top 5 results
for i, r in enumerate(results):
print(f'Top {i+1}: {r}, {search_index[r]}') #{file} - {sentence} - (Score: {score})')
if __name__ == "__main__":
print ('')
options = { 1: ['demo vector search', demo_vector_search],
8: ['crawl',crawl],
9: ['clean text index', txt_clean_index],
10: ['make web dir struct', manual_index],
11: ['create search embeddings', create_embeddings],
12: ['create search index', create_search_index],
13: ['do an index search', search_index],
14: ['do a vector search', search_embeddings],
15: ['test priority', test_priority],
16: ['test embed', test_embed]
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()

View File

@ -1,11 +1,105 @@
# Try to gather all the different formats and ways of labeling a semester, along with their associated dates.
import json, funcy
import json, funcy, re, sys
sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
# sem_to_short = { 'Summer 2021': 'su21', 'Fall 2021':'fa21', 'Winter 2022':'wi22', 'Spring 2022':'sp22', 'Summer 2022':'su22', 'Fall 2022':'fa22' }
season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
# Inverse
number_to_season = {v: k for k, v in season_to_number.items()}
s_to_n = {'sp':'30','su':'50','fa':'70'}
season_to_short = {
'Summer': 'su',
'Fall': 'fa',
'Winter': 'wi',
'Spring': 'sp'
}
standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
# Given 'fa22' return 202270
def short_to_sis(s):
season = s[0:2]
return "20" + s[2:5] + s_to_n[season]
# go from sp20 to 2020spring
def short_to_long(s):
parts = re.search(r'(\w\w)(\d\d)', s)
yr = parts.group(2)
season = parts.group(1)
seasons = {'sp':'spring','su':'summer','fa':'fall','wi':'winter'}
return '20'+yr+seasons[season]
# from "Summer 2024" to 202450
def human_to_sis(semester):
try:
# Split the semester into its components
parts = semester.split()
# Extract the season and year
season = parts[0]
year = parts[1]
# Generate the code in the format "YYYYSS"
return f"{year}{season_to_number[season]}"
except Exception as e:
print(f"Couldn't parse semester: {semester}'")
return ''
# from 202450 to "Summer 2024"
def sis_to_human(sis_code):
try:
# Extract the year and season code
year = sis_code[:4]
season_code = sis_code[4:]
# Convert season code back to season name
season = number_to_season.get(season_code, "Unknown")
return f"{season} {year}"
except Exception as e:
print(f"Couldn't parse SIS code: {sis_code}")
return ''
# from "Summer 2024" to su24
def human_to_short(semester):
# Split the semester into its components
parts = semester.split()
# Extract the season and year
season = parts[0]
year = parts[1][2:] # Take the last two digits of the year
# Generate the short form
return f"{season_to_short[season]}{year}"
# given human readable form (Spring 2023) return that of the previous semester. Excluding winter.
def get_previous_season(season_year_str):
season_order = {"Spring": "Fall", "Summer": "Spring", "Fall": "Summer"}
try:
season, year = season_year_str.split()
year = int(year)
if season not in season_order or not (2000 <= year <= 2030):
raise ValueError("Invalid season or year")
previous_season = season_order[season]
# Decrement the year if transitioning from Spring to Fall
if season == "Spring":
year -= 1
return f"{previous_season} {year}"
except Exception as e:
return f"Error: {e}"
standard = ['Fall 2026', 'Summer 2026', 'Spring 2026', 'Winter 2026',
'Fall 2025', 'Summer 2025', 'Spring 2025', 'Winter 2025',
'Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
'Fall 2023', 'Summer 2023', 'Spring 2023', 'Winter 2023',
'Fall 2022', 'Summer 2022', 'Spring 2022', 'Winter 2022',
'Fall 2021', 'Summer 2021', 'Spring 2021',
@ -14,9 +108,11 @@ standard = ['Fall 2024', 'Summer 2024', 'Spring 2024', 'Winter 2024',
'Fall 2018', 'Summer 2018', 'Spring 2018',
'Fall 2017', 'Summer 2017', 'Spring 2017', ]
code = 'fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
code = 'fa26,su26,sp26,wi26,fa25,su25,sp25,wi25,fa24,su24,sp24,wi24,fa23,su23,sp23,wi23,fa22,su22,sp22,wi22,fa21,su21,sp21,fa20,su20,sp20,wi20,fa19,su19,sp19,wi19,fa18,su18,sp18,fa17,su17,sp17'.split(',')
begin = ['08/26','06/10','01/29','01/02',
begin = ['08/25','05/22','01/26','01/01', # not sure on fa26
'08/25','05/27','01/27','01/02',
'08/26','06/10','01/29','01/02',
'08/28','06/12','01/30','01/03',
'08/22','06/13','01/31','01/04',
'08/23','06/14','02/01',
@ -27,32 +123,66 @@ begin = ['08/26','06/10','01/29','01/02',
canvas_label = []
semester_list = {}
season_to_number = { 'Fall': '70', 'Summer': '50', 'Spring': '30', 'Winter': '10'}
s_to_n = {'sp':'30','su':'50','fa':'70'}
sems_by_human_name = {}
for s in list(zip(standard,code,begin)):
season,year = s[0].split(' ')
cl = year + " " + season
sem_record = {'name': s[0], 'code': s[1], 'start': s[2] + '/' + s[1][-2:], 'number': year + season_to_number[s[0].split(' ')[0]]}
semester_list[s[0]] = sem_record
semester_list[s[1]] = sem_record
sems_by_human_name[s[0]] = sem_record
sems_by_human_name[s[1]] = sem_record
canvas_label.append(cl)
semester_list[cl] = sem_record
sems_by_human_name[cl] = sem_record
sems_by_short_name = funcy.project(sems_by_human_name, code)
# Given 'fa22' return 202270
def to_sis_sem(s):
season = s[0:2]
return "20" + s[2:5] + s_to_n[season]
def dump():
print("BY HUMAN READABLE NAME")
print(json.dumps(sems_by_human_name,indent=2))
print("\n\nBY SEM SHORTCODE")
print(json.dumps(sems_by_short_name,indent=2))
# print(json.dumps(semester_list,indent=2))
sems = funcy.project(semester_list, code)
#print(json.dumps(sems,indent=2))
def weeks_from_date():
from datetime import datetime, timedelta
weeks = int( input("how many weeks ahead? "))
# Replace this with your starting date
x = datetime.strptime("2025-05-27", "%Y-%m-%d")
# Add six weeks
six_weeks_later = x + timedelta(weeks=weeks)
print("Six weeks later:", six_weeks_later.strftime("%Y-%m-%d"))
if __name__ == "__main__":
print ('')
options = { 1: ['print semester info',dump] ,
2: ['compute x weeks from date', weeks_from_date ],
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()
"""

590
ssb.py Normal file
View File

@ -0,0 +1,590 @@
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from datetime import datetime
import time, codecs, traceback
from bs4 import BeautifulSoup as bs
from io import StringIO
from time import strptime
from deepdiff import DeepDiff
from datetime import datetime as dt
from dateutil import parser
from util import fix_t_name, split_class_dept, split_class_code, split_class_code_letter
import json, re, sys, os, codecs, csv, pathlib
import schedules
def writepage(txt):
errfile = codecs.open('lastpage.txt','w','utf-8')
errfile.write(txt)
errfile.close()
DEBUG = 0
def d(s,end=''):
global DEBUG
if end and DEBUG: print(s,end=end)
elif DEBUG: print(s)
# Schedule / course filling history
# csv headers: crn, code, teacher, datetime, cap, act, wlcap, wlact
# Log the history of enrollments per course during registration
def log_section_filling(current_sched_list, short_sem):
rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
print(rows_j)
now = datetime.now().strftime('%Y-%m-%dT%H-%M')
csv_fn = 'cache/reg_history_' + short_sem + '.csv'
with codecs.open(csv_fn,'a','utf-8') as f:
writer = csv.writer(f)
for S in current_sched_list:
#print(S)
items = [now,]
[ items.append( S[X] ) for X in rows_j ]
writer.writerow(items)
# Same as above, but compressed, act only
def log_section_filling2(current_sched_list, short_sem):
now = datetime.now().strftime('%Y-%m-%dT%H')
todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
#print(todays_data)
todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
todays_df = todays_df.rename_axis('crn')
#print(todays_df)
todays_df.to_csv('cache/reg_today_new.csv', index=True)
try:
myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
print(myframe)
except:
fff = open('cache/reg_data_'+short_sem+'.csv','w')
fff.write('crn\n')
fff.close()
myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
#myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
#myframe = myframe.rename_axis('crn')
print("Creating new data file for this semester.")
new_df = myframe.join( todays_df, on='crn', how='outer' )
new_df = new_df.rename_axis('crn')
print(new_df)
reg_data_filename = 'reg_data_' + short_sem + '.csv'
new_df.to_csv('cache/' + reg_data_filename, index=False)
# Take banner's html and make a csv(?) file
def ssb_to_csv(src):
#out = codecs.open(schedfile,'w','utf-8')
output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
b = bs(src, 'html.parser')
tab = b.find(class_="datadisplaytable")
if not tab:
print("hmm... didn't find a 'datadisplaytable' in this html: ")
#print(src)
return 0
rows = tab.find_all('tr')
drows = list(filter(row_has_data,rows))
for dd in drows:
t = row_text(dd)
output += t
return output
# take text lines and condense them to one dict per section
def to_section_list(input_text,verbose=0):
this_course = ''
#todo: no output files
#jout = codecs.open(filename, 'w', 'utf-8')
#input = csv.DictReader(open(schedfile,'r'))
#input = UnicodeDictReader(input_text.splitlines())
all_courses = []
try:
f = StringIO(input_text)
except:
print("ERROR with this input_text:")
print(input_text)
reader = csv.reader(f, delimiter=',')
headers = next(reader)
for r in reader:
d = dict(list(zip(headers,r)))
#pdb.set_trace()
# clean funny unicode char in blank entries
r = {k: clean_funny2(v) for k,v in list(d.items()) }
if verbose: print("Cleaned: " + str(r))
if 'time' in r:
if r['time']=='TBA': r['time'] = ''
if r['time']: r['partofday'] = time_to_partofday(r['time'])
r['type'] = ''
if 'loc' in r:
if r['loc'] == 'ONLINE': r['type'] = 'online'
if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
if r['loc']: r['site'] = room_to_site(r['loc'],verbose)
if 'code' in r:
if re.search(r'ONLINE\sLIVE',r['code']):
r['type'] = 'online live'
elif re.search(r'ONLINE',r['code']):
r['type'] = 'online'
# does it have a section? it is the last course
if r['crn']: # is a new course or a continuation?
if verbose: print(" it's a new section.")
if this_course:
if not this_course['extra']: this_course.pop('extra',None)
all_courses.append(this_course)
this_course = r
#print(r['name'])
this_course['extra'] = []
else:
# is a continuation line
if verbose: print(" additional meeting: " + str(r))
for k,v in list(r.items()):
if not v: r.pop(k,None)
# TODO: if extra line is different type?
#if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
#elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
this_course['extra'].append(r)
return all_courses
##
## SCHEDULE PARSE HELPERS
##
##
def time_to_partofday(t):
#todo: account for multiple sites/rows
# 11:20 am-12:10 pm
mor = strptime('12:00 PM', '%I:%M %p')
mid = strptime( '2:00 PM', '%I:%M %p')
aft = strptime( '6:00 PM', '%I:%M %p')
if t == 'TBA':
return 'TBA'
t = t.upper()
parts = t.split('-')
try:
begin = strptime(parts[0], '%I:%M %p')
end = strptime(parts[1], '%I:%M %p')
if end > aft:
return "Evening"
if end > mid:
return "Afternoon"
if end > mor:
return "Midday"
return "Morning"
#return begin,end
except Exception as e:
#print 'problem parsing: ', t, " ",
return ""
# Deduce a 'site' field, based on room name and known offsite locations
def room_to_site(room,verbose=0):
#todo: account for multiple sites/rows
#todo: better way to store these offsite labels
othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')
# is it gilroy, mh, hol, other, online or hybrid?
site = 'Gilroy'
#if len(course[0]) > 13:
# room = course[0][13]
if room in othersites:
site = "Other"
if room == 'TBA':
site = 'TBA'
if room == 'AV':
site = 'San Martin Airport'
if re.search('MHG',room):
site = 'Morgan Hill'
if re.search('HOL',room):
site = 'Hollister'
if re.search('COY',room):
site = 'Coyote Valley'
if re.search('OFFSTE',room):
site = 'Other'
if re.search('ONLINE',room):
site = 'Online'
if verbose: print(room, '\t', end=' ')
return site
def row_has_data(r): # helper
if r.find_all('th'):
return False
if len(r.find_all('td')) > 2:
return True
if re.search('Note\:', r.get_text()):
return True
return False
def row_text(r): # helper
#global dbg
d("Row Txt Fxn gets: ")
arr = []
for t in r.find_all('td'):
if t.contents and len(t.contents) and t.contents[0].name == 'img':
arr.append("1")
d("img")
r_text = t.get_text()
arr.append(r_text)
if 'colspan' in t.attrs and t['colspan']=='2':
d('[colspan2]')
arr.append('')
d("\t"+r_text, end=" ")
d('')
if len(arr)==1 and re.search('Note\:',arr[0]):
note_line = clean_funny( arr[0] )
note_line = re.sub(r'\n',' ', note_line)
note_line = re.sub(r'"','', note_line)
#note_line = re.sub(r',','\,', note_line)
return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
del arr[0]
arr[1] = clean_funny(arr[1])
arr[2] = clean_funny(arr[2])
if arr[1]: arr[1] = arr[1] + " " + arr[2]
del arr[2]
arr = [ re.sub(r'&nbsp;','',a) for a in arr]
arr = [ re.sub(',','. ',a) for a in arr]
arr = [ re.sub('\(P\)','',a) for a in arr]
arr = [ a.strip() for a in arr]
#del arr[-1]
r = ','.join(arr)+'\n'
r = re.sub('\n','',r)
r = re.sub('add to worksheet','',r)
d("Row Txt Fxn returns: " + r + "\n\n")
return r + '\n'
def clean_funny(str):
if str and str.encode('utf8') == ' ': return ''
return str
def clean_funny2(str):
if str and str == '\xa0': return ''
if str and str == ' ': return ''
return str
def clean_funny3(str):
return re.sub('\xa0','',str)
def scrape_schedule(short_sem, semester_label):
# Set up Chrome options
chrome_options = Options()
#chrome_options.add_argument("--headless") # Run headless
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# Start WebDriver
driver = webdriver.Chrome(options=chrome_options)
URL = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
GOO = "G00102586"
GOO_PIN = "987654bb"
filename = f"{short_sem}_sched.json"
filename_html = f"{short_sem}_sched.html"
try:
# Open page
driver.get(URL)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.ID,"UserID").clear()
driver.find_element(By.ID,"UserID").send_keys(GOO)
driver.find_element(By.NAME,"PIN").send_keys(GOO_PIN)
driver.find_element(By.NAME,"loginform").submit()
print('login')
driver.implicitly_wait(5)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.LINK_TEXT,"Student").click()
print('students')
driver.implicitly_wait(5)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.LINK_TEXT,"Registration").click()
print('registration')
driver.implicitly_wait(5)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.LINK_TEXT,"Search for Classes").click()
print('search for classes')
driver.implicitly_wait(15)
writepage(driver.page_source)
print(driver.title)
dd = Select(driver.find_element(By.NAME,"p_term"))
if (dd):
dd.select_by_visible_text(semester_label)
driver.find_element(By.XPATH,"/html/body/div/div[4]/form").submit()
print('semester')
driver.implicitly_wait(15)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.XPATH,"/html/body/div/div[4]/form/input[18]").click()
print('advanced?')
driver.implicitly_wait(10)
writepage(driver.page_source)
print(driver.title)
driver.find_element(By.NAME,"SUB_BTN").click()
print('login')
driver.implicitly_wait(40)
time.sleep(15)
driver.implicitly_wait(40)
writepage(driver.page_source)
print(driver.title)
text = driver.page_source
codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)
##
## Start parsing html
##
as_list = ssb_to_csv(text)
print(as_list)
as_dict = to_section_list(as_list)
jj = json.dumps(as_dict,indent=2)
##
## Diff from previous semester
##
try:
ps = codecs.open('cache/'+filename,'r','utf-8')
prev_sched = json.loads(ps.read())
ps.close()
if 1: # sometimes I want to re-run this without affecting the logs.
log_section_filling(as_dict, short_sem)
log_section_filling2(as_dict, short_sem)
dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
pretty_json = json.dumps( json.loads( dd.to_json() ), indent=2 )
codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write( pretty_json ) # dd.to_json() )
# Next, rename the prev sched_xxYY.json data file to have its date,
# make this new one, and then upload it to the website.
# Maybe even count the entries and do a little sanity checking
#
# print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
# print("Created: %s" % time.ctime(os.path.getctime("test.txt")))
last_mod = time.ctime(os.path.getmtime('cache/' + filename))
prev_stat = pathlib.Path('cache/' + filename).stat()
mtime = dt.fromtimestamp(prev_stat.st_mtime)
print(mtime)
except Exception as e:
print("Couldn't Diff.")
print("Got an exception: ", e)
# fname = pathlib.Path('test.py')
# assert fname.exists(), f'No such file: {fname}' # check that the file exists
# print(fname.stat())
#
# os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413,
# st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)
codecs.open(f'cache/{filename}', 'w', 'utf-8').write(jj)
return as_dict
except Exception as e:
print("Got an exception: ", e)
#print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()))
finally:
driver.quit()
def expanded(as_dict, short_sem):
#as_dict = scrape_schedule()
course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = schedules.campus_dept_hierarchy()
expanded = list_latestarts(short_sem)
fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")
ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
with ffcsv as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
for S in expanded:
parts = S['code'].split(' ')
S['dept'] = parts[0]
S['num'] = parts[1]
S['gp'] = course_to_gp[parts[0]]
S['dean'] = course_to_dean[parts[0]]
S['sem'] = short_sem
# S['act'] = S['cap']
if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
csvwriter.writerow( [ S[x] for x in fields ] )
#put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0)
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
def list_latestarts(term):
show_summary = 1
the_year = '20' + term[2:4]
print("year: ", the_year, " semester: ", term)
#term_in = "cache/%s_sched.json" % term
term_out = "cache/%s_latestarts.txt" % term
expanded_out = "%s_sched_expanded.json" % term
print("Writing output to " + term_out)
#infile = codecs.open(term_in, "r", "utf-8")
outfile = codecs.open(term_out, "w", "utf-8")
exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
expanded = []
#sched = json.loads(infile.read())
#sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched.json").json()
sched = json.loads( codecs.open(f"cache/{term}_sched.json","r","utf-8").read() )
by_date = {}
if show_summary: print("course \t loc \t type \t time")
for C in sched:
if (not C['type']) and C['loc'] != 'ONLINE': # and C['time']:
C['type'] = 'in-person'
if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))
if 'extra' in C:
if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
C['type'] = 'hybrid'
times = C['time'].split("-")
if len(times) > 1:
time_start = times[0]
time_end = times[1]
try:
startt = time.strptime(time_start,"%I:%M %p")
endt = time.strptime(time_end,"%I:%M %p")
min_start = startt.tm_min
min_end = endt.tm_min
if min_start == 0: min_start = "00"
else: min_start = str(min_start)
if min_end == 0: min_end = "00"
else: min_end = str(min_end)
C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
if 0:
print("+ Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
except Exception as e:
print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
else:
C['time_start'] = ''
C['time_end'] = ''
if re.search('TBA',C['date']):
C['start'] = ''
C['end'] = ''
C['doy'] = ''
expanded.append(C)
continue
parts = C['date'].split("-")
start = parts[0] + "/" + the_year
end = parts[1] + "/" + the_year
try:
startd = parser.parse(start)
endd = parser.parse(end)
C['start'] = "%i-%i" % (startd.month,startd.day)
C['end'] = "%i-%i" % (endd.month,endd.day)
C['doy'] = startd.timetuple().tm_yday
expanded.append(C)
except Exception as e:
print(e, "\n-- problem parsing ", start, " or ", end)
if not startd in by_date:
by_date[startd] = []
by_date[startd].append(C)
exoutfile.write( json.dumps(expanded,indent=2) )
exoutfile.close()
#put_file('/home/public/schedule/', 'cache/', expanded_out, 0)
for X in sorted(by_date.keys()):
#print("Start: ", X)
if len(by_date[X]) < 200:
prettydate = X.strftime("%A, %B %d")
#print(prettydate + ": " + str(len(by_date[X])) + " courses")
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
for Y in by_date[X]:
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
#print(Y)
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
outfile.close()
#put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)
return expanded
# Get semesters to scrape
with open('cache/to_scrape.json', 'r') as f:
semesters = json.load(f)
# Loop through each item and call the function
for item in semesters:
as_dict = scrape_schedule(item['short_sem'], item['sem'])
ex = expanded(as_dict, item['short_sem'])
print(f"Done with {item['sem']}. Sleeping 45 seconds.")
time.sleep(45)

View File

@ -675,7 +675,7 @@ def all_course_names():
mr.write(json.dumps(master_record,indent=2))
from semesters import semester_list, canvas_label
from semesters import sems_by_human_name, canvas_label
from semesters import code as semester_order
from localcache import all_students_history
from datetime import datetime, timedelta
@ -683,12 +683,12 @@ from datetime import datetime, timedelta
def semester_dates():
#print()
for c in canvas_label:
print(semester_list[c])
print(sems_by_human_name[c])
length = 15
if semester_list[c]['code'][0:2] == 'su':
if sems_by_human_name[c]['code'][0:2] == 'su':
length = 5
start_date = semester_list[c]['start']
start_date = sems_by_human_name[c]['start']
# Convert the date string to a datetime object
date_object = datetime.strptime(start_date, '%m/%d/%y')
start_fmt = date_object.strftime('%a %b %d, %Y')
@ -728,17 +728,17 @@ def course_line_process(line):
current_student_block.append(current_student_info)
normalized_blocks.append(current_student_block)
current_student_block = []
current_student_info = {'first':semester_list[sem]['code'], 'last':''}
current_student_info = {'first':sems_by_human_name[sem]['code'], 'last':''}
current_student = uid
#print(f"Student: {uid} ({line['user_name']})")
# line is a dict
current_student_info['last'] = semester_list[sem]['code']
current_student_info['last'] = sems_by_human_name[sem]['code']
year, season = m1.group(1), m1.group(2)
date_format = "%Y-%m-%d %H:%M:%S.%f"
create_dt = datetime.strptime(line['created'], date_format)
update_dt = datetime.strptime(line['updated'], date_format)
sem_start = datetime.strptime(semester_list[sem]['start'], '%m/%d/%y')
sem_start = datetime.strptime(sems_by_human_name[sem]['start'], '%m/%d/%y')
course = line['course_name']
c_parts = course.split(' ')
@ -763,7 +763,7 @@ def course_line_process(line):
sign = '+'
#print(f" {mark} {classname} added T{sign}{add_day} {semester_list[sem]['code']}")
temp_usr_name = re.sub(r',','',line['user_name'])
current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{semester_list[sem]['code']}")
current_student_block.append(f"{uid},{temp_usr_name},{classname},add,T{sign}{add_day},{sems_by_human_name[sem]['code']}")
if flow == "deleted":
# deleted, give delete date
del_day = sem_start - update_dt
@ -773,7 +773,7 @@ def course_line_process(line):
del_day = -del_day
sign = '+'
#print(f" {mark} {classname} deleted T{sign}{del_day} {semester_list[sem]['code']}")
current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{semester_list[sem]['code']}")
current_student_block.append(f"{uid},{temp_usr_name},{classname},del,T{sign}{del_day},{sems_by_human_name[sem]['code']}")
def normalize_course_histories():

View File

@ -1,6 +1,6 @@
import json, codecs, requests, re, pdb, csv, textdistance, collections
import sys, csv, string, funcy, math, shutil, imghdr, os
import sys, csv, string, funcy, math, shutil, os
import pytz, time
import pandas as pd
import matplotlib.pyplot as plt
@ -8,12 +8,14 @@ import matplotlib.pyplot as plt
#from pandas import TimeGrouper
from PIL import Image
from collections import defaultdict
from pipelines import fetch, fetch_stream, getSemesterSchedule, header, url, FetchError, put_file
from pipelines import fetch, fetch_stream, header, url, FetchError, put_file
from schedules import get_semester_schedule
from courses import course_enrollment, users_in_semester
from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock
from localcache import teachers_courses_semester, course_mode, sem_schedule
from localcache2 import all_2x_sem_courses_teachers, all_sem_courses_teachers
from pipelines import dean, dean_names
from schedules import campus_dept_hierarchy
#from pipelines import dean, dean_names #TODO
from util import dept_from_name, most_common_item
from os.path import exists, getmtime
@ -234,7 +236,7 @@ def staff_dir(get_fresh=False):
#
def schedForTeacherOverview(long,short):
sem = getSemesterSchedule(short)
sem = get_semester_schedule(short)
sem['type'] = sem['type'].apply(classType)
#sem['code'] = sem[['code','type']].apply(' '.join,axis=1)
sem['sem'] = short
@ -1099,6 +1101,7 @@ def checkForAvatar(id=2):
# Grab em. Change the first if when continuing after problems....
def downloadPhoto():
import imghdr
pix_dir = 'cache/picsCanvas2022/'
# Update the list of all ilearn users?
i_last_ix = '-1'
@ -2203,17 +2206,28 @@ def cross_ref_training():
wb = load_workbook("C:/Users/phowell/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx")
print(wb.sheetnames)
# Fetch from Canvas DB. Make sure its recently updated.
# Also relies on schedule being in database. Run localcache2.courses_to_sched()
courses = all_2x_sem_courses_teachers('202550', '202570') #
#courses = all_sem_courses_teachers('202470')
# report for email
report = codecs.open('cache/gott_report.txt','w','utf-8')
# update local list of teachers from ilearn?
RELOAD_TEACHERS = 0
ask = input('download new list of teachers? (y/n) ')
if ask.strip()=='y': RELOAD_TEACHERS = 1
if RELOAD_TEACHERS:
teacherRolesUpdateCache()
# TODO inefficient but just read it again
all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())
course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = campus_dept_hierarchy()
records = {}
sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed', 'SU21 Workshop', 'BOOT CAMP', 'GOTT ABC', 'TITLE V GOTT ACADEMY', 'Other Certifications']
for sname in sheets:
@ -2221,7 +2235,7 @@ def cross_ref_training():
sheet = wb[sname]
records[sname] = {}
for row in sheet.iter_rows():
if row[0].value == 'G Number': continue
if row[1].value == 'ID': continue
goo = row[1].value
rowvals = [str(v.value) for v in row]
records[sname][goo] = rowvals
@ -2233,17 +2247,39 @@ def cross_ref_training():
teachers_bydept = defaultdict(set)
alldepts = set()
# reconfigure to key on goo
by_goo = defaultdict(dict)
# courses = all_2x_sem_courses_teachers('202450', '202470') #
courses = all_sem_courses_teachers('202470')
for course,coursedict in records.items():
print(course)
for goo,record in coursedict.items():
if goo=='ID': continue
if record[0]=='Name': continue
print(f" {goo}")
try:
if record[4]=="None":
by_goo[goo][course] = "ok"
else:
by_goo[goo][course] = record[4] # record the end date
except:
print(f" -- problem with this record {json.dumps(record)}")
by_goo[goo][course] = 'ok'
bg_file = codecs.open('cache/gott_by_goo.json','w','utf-8')
bg_file.write(json.dumps(by_goo,indent=2))
##
## Start looking at the courses to cross reference
##
for c in courses:
print(c)
try:
goo = c[6]
crn = c[8]
goo = c[8]
crn = c[4]
name = c[1] # full course name
teacher = c[4] # last, first
ctype = c[7]
teacher = c[6] # last, first
ctype = c[3]
dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1)
alldepts.add(dept1)
d = list(c)
@ -2267,16 +2303,16 @@ def cross_ref_training():
flagfont = PatternFill("solid", fgColor="00FFFFCC")
for thedean in ['et','nl','ss','jn', 'de']:
sheet.cell(row=r, column=1).value = dean_names[thedean]
sheet.cell(row=r, column=1).value = dean_code_to_name[thedean]
sheet.cell(row=r, column=1).font = deptfont
r += 2
report.write(f"Dean: {dean_names[thedean]}\n")
report.write(f"Dean: {dean_code_to_name[thedean]}\n")
for D in alldepts:
if not D in dean:
if not D in course_to_dean:
print(f"MISSING DEAN for dept: {D}")
if dean[D] == thedean:
if course_to_dean[D] == thedean:
if len(teachers_bydept[D]) == 0: continue
print(f"\n------------\n{D}")
sheet.cell(row=r, column=1).value = D
@ -2289,8 +2325,8 @@ def cross_ref_training():
waived = 0
sects = teachers[t]
print(f"Sections for {t}: {sects}")
goo = sects[0][6]
course_mode = sects[0][7]
goo = sects[0][8]
course_mode = sects[0][3]
print(t)
sheet.cell(row=r, column=1).value = f"{t}"
sheet.cell(row=r, column=2).value = f"{goo}"
@ -2521,6 +2557,21 @@ def cross_ref_training_withcsv():
print(f" {s[8]} {s[2]}")
print()
def get_portfolios(id=0):
if not id:
id = int( input( "what user id? "))
p = fetch( f"{url}/api/v1/users/{id}/eportfolios" )
print(json.dumps(p, indent=2))
def get_port_pages(id=0):
if not id:
id = int( input("what portfolio id? "))
p = fetch(f"{url}/api/v1/eportfolios/{id}/pages")
print(json.dumps(p, indent=2))
@ -2552,6 +2603,10 @@ if __name__ == "__main__":
25: ['cross ref training', cross_ref_training],
26: ['find goo numbers in training spreadsheet', training_find_goos],
30: ['get portfolios for user id', get_portfolios],
31: ['get portfolio pages for portfolio id', get_port_pages],
#3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
#5: ['Match names in schedule & ilearn', match_usernames],
#6: ['Create Dept\'s ZTC list', create_ztc_list],

49
util.py
View File

@ -13,6 +13,43 @@ import functools
from functools import reduce
# Teacher name format changed. Remove commas and switch first to last
def fix_t_name(str):
str = str.strip()
str = re.sub('\s+',' ',str)
parts = str.split(', ')
if len(parts)>1:
return parts[1].strip() + " " + parts[0].strip()
return str
# Separate dept and code
def split_class_dept(c):
return c.split(' ')[0]
def split_class_code(c):
num = c.split(' ')[1]
parts = re.match('(\d+)([a-zA-Z]+)',num)
#ret = "Got %s, " % c
if parts:
r = int(parts.group(1))
#print(ret + "returning %i." % r)
return r
#print(ret + "returning %s." % num)
return int(num)
def split_class_code_letter(c):
num = c.split(' ')[1]
parts = re.match('(\d+)([A-Za-z]+)',num)
if parts:
return parts.group(2)
return ''
def nowAsStr():
#Get the current time, printed in the right format
currentTime = datetime.datetime.utcnow()
prettyTime = currentTime.strftime('%a, %d %b %Y %H:%M:%S GMT')
return prettyTime
def contains_key_value(lst, x, y):
"""
Checks if a list contains a dictionary with a specific key-value pair.
@ -236,3 +273,15 @@ def partition(times_list):
dd.write(json.dumps(timeline_times))
return sessions
def clean_fn(s):
s = re.sub(r'[\s:]+','',s)
s = re.sub(r'\/','+',s)
return s
def format_html(html):
soup = bs(html, 'html.parser')
return soup.prettify()

182
video.py Normal file
View File

@ -0,0 +1,182 @@
# Tools for detecting video embeds, swapping SRT subtitle files, etc
import codecs, re, requests, json, os, webbrowser
from bs4 import BeautifulSoup as bs
from util import minimal_string, stripper, mycleaner
from content import grab_course_pages
from pipelines import put_file
# Use template to build html page with homegrown subtitles
def build_srt_embed_php(data):
template = codecs.open('template_srt_and_video.txt','r','utf-8').readlines()
result = ''
for L in template:
L = re.sub('FRAMEID',data['frameid'],L)
L = re.sub('TITLE',data['title'],L)
L = re.sub('EMBEDLINK',data['embedlink'],L)
L = re.sub('SRTFOLDERFILE',data['srtfolderfile'],L)
result += L
return result
def yt_title(code):
global saved_titles
if code in saved_titles:
return saved_titles[code]
a = requests.get('https://www.youtube.com/watch?v=%s' % code)
bbb = bs(a.content,"lxml")
ccc = bbb.find('title').text
ccc = re.sub(r'\s\-\sYouTube','',ccc)
saved_titles[code] = ccc
codecs.open('saved_youtube_titles.json','w','utf-8').write(json.dumps(saved_titles))
return ccc
def swap_youtube_subtitles():
# example here: http://siloor.github.io/youtube.external.subtitle/examples/srt/
# srt folder, look at all filenames
srtlist = os.listdir('video_srt')
i = 0
for V in srtlist:
print(str(i) + '. ' + V)
i += 1
choice = input("Which SRT folder? ")
choice = srtlist[int(choice)]
srt_folder = 'video_srt/'+choice
class_srt_folder = choice
srt_files = os.listdir(srt_folder)
srt_shorts = {}
print("\nThese are the subtitle files: " + str(srt_files))
for V in srt_files:
if V.endswith('srt'):
V1 = re.sub(r'(\.\w+$)','',V)
srt_shorts[V] = minimal_string(V1)
crs_id = input("What is the id of the course? ")
grab_course_pages(crs_id)
v1_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
v1_content = v1_pages.read()
# a temporary page of all youtube links
tp = codecs.open('page_revisions/links_' + str(crs_id) + '.html', 'w','utf-8')
# course pages, get them all and look for youtube embeds
title_shorts = {}
title_embedlink = {}
title_list = []
print("I'm looking for iframes and youtube links.")
for L in v1_content.split('\n'):
if re.search('<a.*?href="https:\/\/youtu',L):
print("Possibly there's a linked video instead of embedded:" + L)
if re.search('iframe',L):
ma = re.compile('(\w+)=(".*?")')
#print "\n"
this_title = ''
for g in ma.findall(L):
print(g)
if g[0]=='title':
this_title = g[1].replace('"','')
if g[0]=='src':
this_src = g[1].replace('"','')
#print g
if not this_title:
tmp = re.search(r'embed\/(.*?)\?',this_src)
if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
if tmp:
this_title = yt_title(tmp.groups()[0])
title_shorts[this_title] = minimal_string(this_title)
title_list.append(this_title)
title_embedlink[this_title] = this_src
print("%s\n" % this_title.encode('ascii','ignore'))
tp.write( "%s<br><a target='_blank' href='%s'>%s</a><br /><br />" % (this_title, this_src, this_src) )
# match them
# lowercase, non alpha or num chars become a single space, try to match
# if any srts remain unmatched, ask.
tp.close()
webbrowser.open_new_tab('file://C:/SCRIPTS/everything-json/page_revisions/links_'+str(crs_id)+'.html')
matches = {} # key is Title, value is srt file
for S,v in list(srt_shorts.items()):
found_match = 0
print(v, end=' ')
for T, Tv in list(title_shorts.items()):
if v == Tv:
print(' \tMatches: ' + T, end=' ')
found_match = 1
matches[T] = S
break
#print "\n"
print("\nThese are the srt files: ")
print(json.dumps(srt_shorts,indent=2))
print("\nThese are the titles: ")
print(json.dumps(title_shorts,indent=2))
print("\nThese are the matches: ")
print(json.dumps(matches,indent=2))
print(("There are %d SRT files and %d VIDEOS found. " % ( len(list(srt_shorts.keys())), len(list(title_shorts.keys())) ) ))
for S,v in list(srt_shorts.items()):
if not S in list(matches.values()):
print("\nDidn't find a match for: " + S)
i = 0
for T in title_list:
if not T in list(matches.keys()): print(str(i+1) + ". " + T.encode('ascii', 'ignore'))
i += 1
print("Here's the first few lines of the SRT:")
print(( re.sub(r'\s+',' ', '\n'.join(open(srt_folder+"/"+S,'r').readlines()[0:10]))+"\n\n"))
choice = input("Which one should I match it to? (zero for no match) ")
if int(choice)>0:
matches[ title_list[ int(choice)-1 ] ] = S
print("SRT clean name was: %s, and TITLE clean name was: %s" % (v,title_shorts[title_list[ int(choice)-1 ]] ))
print("ok, here are the matches:")
print(json.dumps(matches,indent=2))
# construct subsidiary pages, upload them
i = 0
for m,v in list(matches.items()):
# open template
# do replacement
i += 1
data = {'frameid':'videoframe'+str(i), 'title':m, 'embedlink':title_embedlink[m], 'srtfolderfile':v }
print(json.dumps(data,indent=2))
file_part = v.split('.')[0]
new_php = codecs.open(srt_folder + '/' + file_part + '.php','w','utf-8')
new_php.write(build_srt_embed_php(data))
new_php.close()
#srt_files = os.listdir(srt_folder)
put_file(class_srt_folder)
def test_swap():
crs_id = '6923'
# swap in embed code and re-upload canvas pages
v2_pages = codecs.open('page_revisions/course_'+str(crs_id)+'.html','r','utf-8')
v2_content = v2_pages.read()
ma = re.compile('(\w+)=(".*?")')
for L in v2_content.split('\n'):
find = re.findall('<iframe(.*?)>',L)
if find:
print("Found: ", find)
for each in find:
#print "\n"
this_title = ''
this_src = ''
for g in ma.findall(each):
#print g
if g[0]=='title':
this_title = g[1].replace('"','')
if g[0]=='src':
this_src = g[1].replace('"','')
#print g
if not this_title:
tmp = re.search(r'embed\/(.*?)\?',this_src)
if not tmp: tmp = re.search(r'embed\/(.*?)$',this_src)
if tmp:
this_title = yt_title(tmp.groups()[0])
print("Found embed link: %s\n and title: %s\n" % (this_src,this_title.encode('ascii','ignore')))