1251 lines
47 KiB
Python
1251 lines
47 KiB
Python
import util
|
|
import codecs, json, requests, re, csv, datetime, os, jsondiff, os.path
|
|
import sys, shutil, hmac, hashlib, base64, schedule, time, pathlib
|
|
from datetime import timedelta
|
|
|
|
from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, url, domain, account_id, header, header_media
|
|
from canvas_secrets import instructure_url, instructure_username, instructure_private_key
|
|
|
|
import os, asyncio
|
|
from dap.api import DAPClient
|
|
from dap.dap_types import Credentials
|
|
from dap.integration.database import DatabaseConnection
|
|
from dap.replicator.sql import SQLReplicator
|
|
|
|
|
|
|
|
"""
|
|
Everything to do with fetching data,
|
|
- From iLearn, via token
|
|
- current roster uploads from instructures sftp site
|
|
- raw logs and other from canvas data repo
|
|
- from ssb, use firefox to scrape the schedule
|
|
|
|
|
|
And some subsequent processing:
|
|
- Raw roster files, into a more compact json format
|
|
- Raw logs into something more useful
|
|
"""
|
|
|
|
verbose = False
|
|
|
|
users = {}
|
|
users_by_id = {}
|
|
|
|
# todo: all these constants for SSB -- line 1008
|
|
#
|
|
# todo: https://stackoverflow.com/questions/42656247/how-can-i-use-canvas-data-rest-api-using-python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sys.setrecursionlimit( 100000 )
|
|
|
|
local_data_folder = 'cache/canvas_data/'
|
|
mylog = codecs.open(local_data_folder + 'temp_log.txt','w')
|
|
|
|
|
|
|
|
|
|
class FetchError(Exception):
|
|
pass
|
|
|
|
|
|
DEBUG = 0
|
|
|
|
def d(s,end=''):
|
|
global DEBUG
|
|
if end and DEBUG: print(s,end=end)
|
|
elif DEBUG: print(s)
|
|
|
|
################
|
|
################ CANVAS API MAIN FETCHING FUNCTIONS
|
|
################
|
|
################
|
|
################
|
|
|
|
|
|
|
|
|
|
# Main canvas querying fxn
|
|
def fetch(target,verbose=0,params=0,media=0):
|
|
# if there are more results, recursivly call myself, adding on to the results.
|
|
results = 0
|
|
if target[0:4] != "http": target = url + target
|
|
if verbose:
|
|
print("++ Fetching: " + target)
|
|
if media:
|
|
r2 = requests.get(target, headers = header_media)
|
|
elif params:
|
|
r2 = requests.get(target, headers = header, params = params)
|
|
else:
|
|
r2 = requests.get(target, headers = header)
|
|
#if verbose:
|
|
#print "++ Got: " + r2.text
|
|
try:
|
|
results = json.loads(r2.text)
|
|
count = len(results)
|
|
except:
|
|
print("-- Failed to parse: ", r2.text)
|
|
if verbose:
|
|
print("Got %i results" % count)
|
|
if verbose > 1:
|
|
print(r2.headers)
|
|
|
|
tempout = codecs.open('cache/fetchcache.txt','a','utf-8')
|
|
tempout.write(r2.text+"\n\n")
|
|
tempout.close()
|
|
|
|
if ('link' in r2.headers and count > 0):
|
|
links = r2.headers['link'].split(',')
|
|
for L in links:
|
|
ll = L.split(';')
|
|
link = ll[0].replace("<","")
|
|
link = link.replace(">","")
|
|
if re.search(r'next', ll[1]):
|
|
if (verbose): print("++ More link: " + link)
|
|
#link = re.sub(r'per_page=10$', 'per_page=100', link) # link.replace('per_page=10','per_page=500')
|
|
#if (verbose): print("++ More link: " + link)
|
|
|
|
nest = fetch(link,verbose,params,media)
|
|
if isinstance(results,dict): results.update(nest)
|
|
else: results.extend(nest)
|
|
return results
|
|
|
|
# Main canvas querying fxn - stream version - don't die on big requests
|
|
def fetch_stream(target,verbose=0):
|
|
# if there are more results, recursivly call myself, adding on to the results.
|
|
results = 0
|
|
while target:
|
|
if target[0:4] != "http": target = url + target
|
|
if verbose:
|
|
print("++ Fetching: " + target)
|
|
r2 = requests.get(target, headers = header)
|
|
if r2.status_code == 502:
|
|
raise FetchError()
|
|
try:
|
|
results = json.loads(r2.text)
|
|
count = len(results)
|
|
except:
|
|
print("-- Failed to parse: ", r2.text)
|
|
if verbose:
|
|
print("Got %i results" % count)
|
|
if verbose > 1:
|
|
print(r2.headers)
|
|
tempout = codecs.open('cache/fetchcache.txt','a','utf-8')
|
|
tempout.write(r2.text+"\n\n")
|
|
tempout.close()
|
|
|
|
next_link_found = 0
|
|
if ('link' in r2.headers and count > 0):
|
|
links = r2.headers['link'].split(',')
|
|
for L in links:
|
|
ll = L.split(';')
|
|
link = ll[0].replace("<","")
|
|
link = link.replace(">","")
|
|
if re.search(r'next', ll[1]):
|
|
target = link
|
|
next_link_found = 1
|
|
break
|
|
if not next_link_found: target = 0
|
|
yield results
|
|
|
|
|
|
# for dicts with one key, collapse that one key out, cause
|
|
# paging makes problems... example: enrollment_terms
|
|
def fetch_collapse(target,collapse='',verbose=0):
|
|
# if there are more results, recursivly call myself, adding on to the results.
|
|
results = 0
|
|
if target[0:4] != "http": target = url + target
|
|
if verbose:
|
|
print("++ Fetching: " + target)
|
|
r2 = requests.get(target, headers = header)
|
|
#if verbose:
|
|
#print "++ Got: " + r2.text
|
|
try:
|
|
results = json.loads(r2.text)
|
|
except:
|
|
print("-- Failed to parse: ", r2.text)
|
|
if verbose: print(r2.headers)
|
|
|
|
if collapse and collapse in results:
|
|
results = results[collapse]
|
|
|
|
if ('link' in r2.headers):
|
|
links = r2.headers['link'].split(',')
|
|
for L in links:
|
|
ll = L.split(';')
|
|
link = ll[0].replace("<","")
|
|
link = link.replace(">","")
|
|
if re.search(r'next', ll[1]):
|
|
if (verbose): print("++ More link: " + link)
|
|
nest = fetch_collapse(link, collapse, verbose)
|
|
if isinstance(results,dict): results.update(nest)
|
|
else: results.extend(nest)
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
################
|
|
################ CANVAS DATA
|
|
################
|
|
################
|
|
################
|
|
|
|
|
|
# Get canvas data 2024 style
|
|
def canvas_data_2024_run():
|
|
print("Updating all tables.")
|
|
asyncio.run(canvas_data_2024())
|
|
print("Done with all tables.")
|
|
|
|
|
|
async def canvas_data_2024():
|
|
|
|
base_url: str = os.environ["DAP_API_URL"]
|
|
client_id: str = os.environ["DAP_CLIENT_ID"]
|
|
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
|
|
#connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
|
|
|
|
# todo: use secrets
|
|
connection_string: str = "postgresql://postgres:rolley34@192.168.1.199/db"
|
|
|
|
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,pseudonyms,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
|
|
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
|
|
|
|
async with DatabaseConnection(connection_string).open() as db_connection:
|
|
async with DAPClient(base_url, credentials) as session:
|
|
#tables = await session.get_tables("canvas")
|
|
for table in desired_tables:
|
|
print(f" trying to update {table} ")
|
|
try:
|
|
#await SQLReplicator(session, db_connection).initialize("canvas", table)
|
|
await SQLReplicator(session, db_connection).synchronize("canvas", table)
|
|
except Exception as e:
|
|
print(f" - skipping {table} because {e}")
|
|
|
|
|
|
|
|
# Get canvas data 2024 style
|
|
def setup_canvas_data_2024_run():
|
|
print("Setting up all tables.")
|
|
asyncio.run(setup_canvas_data_2024())
|
|
print("Done with all tables.")
|
|
|
|
|
|
async def setup_canvas_data_2024():
|
|
|
|
base_url: str = os.environ["DAP_API_URL"]
|
|
client_id: str = os.environ["DAP_CLIENT_ID"]
|
|
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
|
|
#connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
|
|
connection_string: str = "postgresql://postgres:rolley34@192.168.1.192/db"
|
|
|
|
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,pseudonyms,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
|
|
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
|
|
|
|
async with DatabaseConnection(connection_string).open() as db_connection:
|
|
async with DAPClient(base_url, credentials) as session:
|
|
#tables = await session.get_tables("canvas")
|
|
for table in desired_tables:
|
|
print(f" {table}")
|
|
try:
|
|
await SQLReplicator(session, db_connection).initialize("canvas", table)
|
|
except Exception as e:
|
|
print(f" - skipping {table} because {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
################
|
|
################ ROSTERS AND REGISTRATION
|
|
################
|
|
################
|
|
################
|
|
|
|
# todo: the pipeline is disorganized. Organize it to have
|
|
# a hope of taking all this to a higher level.
|
|
#
|
|
|
|
# todo: where does this belong in the pipeline? compare with recent_schedules()
|
|
|
|
|
|
|
|
# Take the generically named rosters uploads files and move them to a semester folder and give them a date.
|
|
def move_to_folder(sem,year,folder,files):
|
|
semester = year+sem
|
|
semester_path = 'cache/rosters/%s' % semester
|
|
if not os.path.isdir('cache/rosters/'+semester):
|
|
os.makedirs('cache/rosters/'+semester)
|
|
now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
|
|
print("+ Moving roster files to folder: %s" % semester_path)
|
|
if not os.path.isdir(semester_path):
|
|
print("+ Creating folder: %s" % semester_path)
|
|
os.makedirs(semester_path)
|
|
def safe_move(src, dst):
|
|
try:
|
|
os.replace(src, dst)
|
|
except Exception as ex:
|
|
print(f"! move failed {src} -> {dst}: {ex}")
|
|
if 'courses.csv' in files:
|
|
safe_move('cache/rosters/courses-%s.csv' % folder, 'cache/rosters/%s/courses.%s.csv' % (semester,now))
|
|
if 'enrollments.csv' in files:
|
|
safe_move('cache/rosters/enrollments-%s.csv' % folder, 'cache/rosters/%s/enrollments.%s.csv' % (semester,now))
|
|
if 'users.csv' in files:
|
|
safe_move('cache/rosters/users-%s.csv' % folder, 'cache/rosters/%s/users.%s.csv' % (semester,now))
|
|
if 'login.csv' in files:
|
|
safe_move('cache/rosters/login-%s.csv' % folder, 'cache/rosters/%s/login.%s.csv' % (semester,now))
|
|
|
|
# Build maps from the latest users/courses snapshot for nicer keys/names.
|
|
# Return the path to the latest `{prefix}.*.csv` file found under `sem_path`.
|
|
def _latest_snapshot_map(sem_path, prefix):
|
|
"""Return path to latest `{prefix}.*.csv` file in `sem_path` or None if missing."""
|
|
try:
|
|
files = [f for f in os.listdir(sem_path) if f.startswith(prefix + '.') and f.endswith('.csv')]
|
|
except FileNotFoundError:
|
|
return None
|
|
def ts_of(name):
|
|
try:
|
|
label = name[len(prefix)+1:-4]
|
|
return datetime.datetime.strptime(label, '%Y-%m-%dT%H-%M')
|
|
except Exception:
|
|
return datetime.datetime.min
|
|
files.sort(key=ts_of)
|
|
return os.path.join(sem_path, files[-1]) if files else None
|
|
|
|
# Helper to read CSV safely into list of dicts.
|
|
# Read a CSV file into a list of dict rows; return [] if missing.
|
|
def _read_csv_dicts(path):
|
|
"""Read a CSV file into a list of normalized dict rows; returns [] if file missing.
|
|
- Normalizes header keys to lowercase and strips whitespace.
|
|
- Strips whitespace from string values.
|
|
"""
|
|
rows = []
|
|
if not path or not os.path.exists(path):
|
|
return rows
|
|
with open(path, 'r', encoding='utf-8', newline='') as f:
|
|
reader = csv.DictReader(f)
|
|
for r in reader:
|
|
norm = {}
|
|
for k, v in (r.items() if r else []):
|
|
nk = (k.strip().lower() if isinstance(k, str) else k)
|
|
if isinstance(v, str):
|
|
v = v.strip()
|
|
norm[nk] = v
|
|
if norm:
|
|
rows.append(norm)
|
|
return rows
|
|
|
|
# Create user lookup keyed by `user_id` with basic fields for convenience.
|
|
# Expected columns: status,user_id,login_id,last_name,first_name,email,password
|
|
def _build_user_map(users_csv_path):
|
|
"""Return dict keyed by `user_id` with selected fields from users.csv.
|
|
Expected columns: status,user_id,login_id,last_name,first_name,email,password
|
|
"""
|
|
user_map = {}
|
|
for r in _read_csv_dicts(users_csv_path):
|
|
uid = r.get('user_id')
|
|
if not uid:
|
|
continue
|
|
user_map[str(uid)] = {
|
|
'user_id': str(uid),
|
|
'login_id': r.get('login_id',''),
|
|
'first_name': r.get('first_name',''),
|
|
'last_name': r.get('last_name',''),
|
|
'email': r.get('email',''),
|
|
}
|
|
return user_map
|
|
|
|
# Create course lookup keyed by `course_id` with long/short names and term.
|
|
# Expected columns: status,term_id,long_name,short_name,course_id,blueprint_course_id
|
|
def _build_course_map(courses_csv_path):
|
|
"""Return dict keyed by `course_id` from courses.csv, keeping long/short names.
|
|
Expected columns: status,term_id,long_name,short_name,course_id,blueprint_course_id
|
|
"""
|
|
course_map = {}
|
|
for r in _read_csv_dicts(courses_csv_path):
|
|
cid = r.get('course_id')
|
|
if not cid:
|
|
continue
|
|
course_map[str(cid)] = {
|
|
'course_id': str(cid),
|
|
'term_id': r.get('term_id',''),
|
|
'long_name': r.get('long_name',''),
|
|
'short_name': r.get('short_name',''),
|
|
}
|
|
return course_map
|
|
|
|
# Parse a timestamp label like 2025-01-31T14-00 into dt.
|
|
# Return datetime or None if parsing fails.
|
|
def _parse_label(label):
|
|
"""Parse the timestamp label from filenames into a datetime; return None on failure."""
|
|
try:
|
|
return datetime.datetime.strptime(label, '%Y-%m-%dT%H-%M')
|
|
except Exception:
|
|
return None
|
|
|
|
# Compute enrollment changes across semester snapshots and emit JSON indexes.
|
|
# Walk enrollments/users/courses snapshots; detect adds/drops/changes; write by_course/by_user JSON.
|
|
def compute_enrollment_changes(sem=None, year=None):
|
|
"""Walk cache/rosters/<year><sem>/enrollments.*.csv ascending, detect adds/drops/changes.
|
|
- If `sem`/`year` omitted, prompt for a semester and resolve via semesters.find_term.
|
|
- Emits JSON files by course and by user for easy UI lookup.
|
|
"""
|
|
if not sem or not year:
|
|
try:
|
|
import semesters
|
|
ans = input("Which semester? (e.g., 'fa25', 'Fall 2025', '2025 Fall'): ").strip()
|
|
rec = semesters.find_term(ans)
|
|
if not rec or not rec.get('standard'):
|
|
print("compute_enrollment_changes: could not parse semester input.")
|
|
return
|
|
std = rec['standard'] # e.g., 'Fall 2025'
|
|
parts = std.split()
|
|
season = (parts[0].lower() if len(parts) >= 2 else '').lower()
|
|
year = parts[1] if len(parts) >= 2 else ''
|
|
season_map = {'spring': 'spring', 'summer': 'summer', 'fall': 'fall', 'winter': 'winter'}
|
|
sem = season_map.get(season, season)
|
|
except Exception as ex:
|
|
print(f"compute_enrollment_changes: semester prompt failed: {ex}")
|
|
return
|
|
semester = f"{year}{sem}"
|
|
sem_path = os.path.join('cache', 'rosters', semester)
|
|
if not os.path.isdir(sem_path):
|
|
print(f"compute_enrollment_changes: missing folder {sem_path}")
|
|
return
|
|
|
|
# Discover all enrollment snapshots in time order
|
|
files = [f for f in os.listdir(sem_path) if f.startswith('enrollments.') and f.endswith('.csv')]
|
|
def snap_key(name):
|
|
label = name[len('enrollments.'):-4]
|
|
dt = _parse_label(label)
|
|
return (dt or datetime.datetime.min, label)
|
|
files.sort(key=snap_key)
|
|
if not files:
|
|
print(f"compute_enrollment_changes: no snapshots in {sem_path}")
|
|
return
|
|
|
|
# Build user/course maps from latest snapshots for enrichment
|
|
latest_users = _latest_snapshot_map(sem_path, 'users')
|
|
latest_courses = _latest_snapshot_map(sem_path, 'courses')
|
|
users_map = _build_user_map(latest_users)
|
|
courses_map = _build_course_map(latest_courses)
|
|
|
|
# Collect remote login info across all login snapshots
|
|
remote_info = {}
|
|
login_files = [f for f in os.listdir(sem_path) if f.startswith('login.') and f.endswith('.csv')]
|
|
def login_key(name):
|
|
label = name[len('login.'):-4]
|
|
dt = _parse_label(label)
|
|
return (dt or datetime.datetime.min, label)
|
|
login_files.sort(key=login_key)
|
|
for fname in login_files:
|
|
for r in _read_csv_dicts(os.path.join(sem_path, fname)):
|
|
uid = r.get('user_id')
|
|
if not uid:
|
|
continue
|
|
remote_info[str(uid)] = {
|
|
'remote': True,
|
|
'root_account': r.get('root_account','')
|
|
}
|
|
# merge remote flags into users_map
|
|
for uid, info in remote_info.items():
|
|
users_map.setdefault(uid, {'user_id': uid, 'login_id':'', 'first_name':'', 'last_name':'', 'email':''})
|
|
users_map[uid].update(info)
|
|
|
|
def choose_course_key(course_id):
|
|
cid = str(course_id or 'unknown')
|
|
detail = courses_map.get(cid, {})
|
|
# Course key at CRN level uses the course_id directly (e.g., 202570-12345)
|
|
key = cid
|
|
info = {'course_id': cid, 'long_name': detail.get('long_name',''), 'short_name': detail.get('short_name',''), 'term_id': detail.get('term_id','')}
|
|
return key, info
|
|
|
|
def choose_user_key(user_id):
|
|
uid = str(user_id or 'unknown')
|
|
info = users_map.get(uid, {})
|
|
# Prefer to show user_id (Canvas/SIS ID here), along with convenience fields
|
|
return uid, {
|
|
'user_id': uid,
|
|
'login_id': info.get('login_id',''),
|
|
'first_name': info.get('first_name',''),
|
|
'last_name': info.get('last_name',''),
|
|
'email': info.get('email',''),
|
|
'remote': info.get('remote', False),
|
|
'root_account': info.get('root_account',''),
|
|
}
|
|
|
|
# Accumulators
|
|
by_course = {}
|
|
by_user = {}
|
|
|
|
prev = {}
|
|
for fname in files:
|
|
label = fname[len('enrollments.'):-4]
|
|
snap_time = _parse_label(label)
|
|
path = os.path.join(sem_path, fname)
|
|
curr = {}
|
|
# Build state for this snapshot keyed by (course,user)
|
|
for r in _read_csv_dicts(path):
|
|
user_id = r.get('user_id')
|
|
course_id = r.get('course_id')
|
|
if not user_id and not course_id:
|
|
continue
|
|
key = (str(course_id or ''), str(user_id))
|
|
curr[key] = {
|
|
'status': r.get('status') or r.get('enrollment_state') or r.get('state') or '',
|
|
'role': r.get('role') or r.get('type') or '',
|
|
}
|
|
|
|
# Compare with previous snapshot (including the first, using empty prev for baseline)
|
|
all_keys = set(prev.keys()) | set(curr.keys())
|
|
for k in all_keys:
|
|
before = prev.get(k)
|
|
after = curr.get(k)
|
|
course_id, user_id = k
|
|
course_key, course_info = choose_course_key(course_id)
|
|
user_key, user_info = choose_user_key(user_id)
|
|
|
|
def emit(action, extra=None):
|
|
evt = {
|
|
'time': (snap_time.isoformat(timespec='minutes') if snap_time else label),
|
|
'action': action,
|
|
'course_key': course_key,
|
|
'course': course_info,
|
|
'user_key': user_key,
|
|
'user': user_info,
|
|
'role': (after or before or {}).get('role',''),
|
|
'status': (after or before or {}).get('status',''),
|
|
}
|
|
if before:
|
|
evt['before'] = before
|
|
if after:
|
|
evt['after'] = after
|
|
by_course.setdefault(course_key, []).append(evt)
|
|
by_user.setdefault(user_key, []).append(evt)
|
|
|
|
if before and not after:
|
|
# Row disappeared; if last known status was deleted, count as drop; otherwise record anomaly.
|
|
if (before.get('status','').lower() == 'deleted'):
|
|
emit('drop')
|
|
else:
|
|
emit('enrollment_row_removed')
|
|
elif after and not before:
|
|
# New row; if active, it's an add; otherwise note row added.
|
|
if (after.get('status','').lower() == 'active'):
|
|
emit('add')
|
|
elif (after.get('status','').lower() == 'deleted'):
|
|
emit('drop')
|
|
else:
|
|
emit('enrollment_row_added')
|
|
elif before and after:
|
|
# detect attribute changes
|
|
role_changed = before.get('role') != after.get('role')
|
|
status_changed = before.get('status') != after.get('status')
|
|
if status_changed:
|
|
if str(after.get('status','')).lower() == 'active':
|
|
emit('add')
|
|
elif str(after.get('status','')).lower() == 'deleted':
|
|
emit('drop')
|
|
else:
|
|
emit('status_change')
|
|
if role_changed:
|
|
emit('role_change')
|
|
|
|
prev = curr
|
|
|
|
# Also detect appearance/disappearance in users.csv and courses.csv sequences
|
|
def diff_entities(prefix, id_field, emit_fn):
|
|
seq = [f for f in os.listdir(sem_path) if f.startswith(prefix + '.') and f.endswith('.csv')]
|
|
def key_fn(name):
|
|
label = name[len(prefix)+1:-4]
|
|
dt = _parse_label(label)
|
|
return (dt or datetime.datetime.min, label)
|
|
seq.sort(key=key_fn)
|
|
prev_ids = set()
|
|
for fname in seq:
|
|
label = fname[len(prefix)+1:-4]
|
|
snap_time = _parse_label(label)
|
|
curr_ids = set()
|
|
for r in _read_csv_dicts(os.path.join(sem_path, fname)):
|
|
vid = r.get(id_field)
|
|
if vid:
|
|
curr_ids.add(str(vid))
|
|
# added
|
|
for vid in sorted(curr_ids - prev_ids):
|
|
emit_fn('added', vid, snap_time, label)
|
|
# removed
|
|
for vid in sorted(prev_ids - curr_ids):
|
|
emit_fn('removed', vid, snap_time, label)
|
|
prev_ids = curr_ids
|
|
|
|
def emit_user_presence(action, uid, snap_time, label):
|
|
user_key, user_info = choose_user_key(uid)
|
|
evt = {
|
|
'time': (snap_time.isoformat(timespec='minutes') if snap_time else label),
|
|
'action': f'user_entry_{action}',
|
|
'user_key': user_key,
|
|
'user': user_info,
|
|
}
|
|
by_user.setdefault(user_key, []).append(evt)
|
|
|
|
def emit_course_presence(action, cid, snap_time, label):
|
|
course_key, course_info = choose_course_key(cid)
|
|
evt = {
|
|
'time': (snap_time.isoformat(timespec='minutes') if snap_time else label),
|
|
'action': f'course_entry_{action}',
|
|
'course_key': course_key,
|
|
'course': course_info,
|
|
}
|
|
by_course.setdefault(course_key, []).append(evt)
|
|
|
|
diff_entities('users', 'user_id', emit_user_presence)
|
|
diff_entities('courses', 'course_id', emit_course_presence)
|
|
|
|
# Sort events by time
|
|
def sort_key(e):
|
|
try:
|
|
return datetime.datetime.fromisoformat(e['time'])
|
|
except Exception:
|
|
return datetime.datetime.min
|
|
for k in by_course:
|
|
by_course[k].sort(key=sort_key)
|
|
for k in by_user:
|
|
by_user[k].sort(key=sort_key)
|
|
|
|
# Write results
|
|
out_all = {
|
|
'semester': semester,
|
|
'generated': datetime.datetime.now().isoformat(timespec='seconds'),
|
|
'by_course': by_course,
|
|
'by_user': by_user,
|
|
}
|
|
try:
|
|
with open(os.path.join(sem_path, 'enrollment_changes.json'), 'w', encoding='utf-8') as f:
|
|
f.write(json.dumps(out_all, indent=2))
|
|
print(f"compute_enrollment_changes: wrote {sem_path}/enrollment_changes.json")
|
|
except Exception as ex:
|
|
print(f"compute_enrollment_changes: failed to write output: {ex}")
|
|
|
|
|
|
|
|
# Take raw upload (csv) files and make one big json out of them.
|
|
# This relates to enrollment files, not schedule.
|
|
def convert_roster_files(semester="",year="",folder=""):
|
|
if not semester:
|
|
semester = input("the semester? (ex: spring) ")
|
|
folder = input("Folder? (ex 2020-02-25-14-58-20) ")
|
|
uf = open('cache/rosters/users-'+folder+'.csv','r')
|
|
cf = open('cache/rosters/courses-'+folder+'.csv','r')
|
|
ef = open('cache/rosters/enrollments-'+folder+'.csv','r')
|
|
u = csv.DictReader(uf)
|
|
c = csv.DictReader(cf)
|
|
e = csv.DictReader(ef)
|
|
uu = [i for i in u]
|
|
cc = [i for i in c]
|
|
ee = [i for i in e]
|
|
uf.close()
|
|
cf.close()
|
|
ef.close()
|
|
myrosterfile = 'cache/rosters/roster_%s_%s.json' % (year, semester)
|
|
|
|
if os.path.exists(myrosterfile):
|
|
print(" -- Moving previous combined roster json file. opening %s ..." % myrosterfile)
|
|
last_fileobj = open(myrosterfile,'r')
|
|
last_file = json.load(last_fileobj)
|
|
|
|
last_fileobj.close()
|
|
|
|
info = last_file[3]
|
|
last_date = info['date_filestring']
|
|
|
|
print(' -- writing: cache/rosters/%s%s/roster_%s.json ...' % (year,semester,last_date))
|
|
|
|
try:
|
|
os.rename(myrosterfile, 'cache/rosters/%s%s/roster_%s.json ...' % (year,semester,last_date))
|
|
print(' -- ok')
|
|
except Exception as e:
|
|
print(" ** Failed because i couldn't move the previous roster file: %s" % myrosterfile)
|
|
print(e)
|
|
myrosterfile = "new_" + myrosterfile
|
|
pass
|
|
#os.remove('cache/old_rosters/roster_'+semester+'.'+last_date+'.json')
|
|
#os.rename(myrosterfile, 'cache/old_rosters/roster_'+semester+'.'+last_date+'.json')
|
|
|
|
newinfo = {'date_filestring': datetime.datetime.now().strftime('%Y-%m-%dT%H-%M'), }
|
|
try:
|
|
new_roster = codecs.open(myrosterfile,'w', 'utf-8')
|
|
new_roster.write( json.dumps( [uu,cc,ee,newinfo], indent=2 ))
|
|
new_roster.close()
|
|
print(" -- Wrote roster info to: %s." % myrosterfile)
|
|
except Exception as e:
|
|
print(" ** Failed because i couldn't move the previous roster file: %s" % myrosterfile)
|
|
print(" ** " + str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# From instructure sftp site
|
|
def fetch_current_rosters(sftp=None, label_hour=None):
|
|
"""Download roster CSVs from Instructure SFTP and post-process.
|
|
- If sftp provided, reuse it (already chdir'd to SIS).
|
|
- Files are saved using label_hour (format YYYY-MM-DD-HH). If None, compute fallback label by flooring to the hour.
|
|
"""
|
|
import pysftp
|
|
def log(msg):
|
|
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
try:
|
|
with open('cache/pipeline.log.txt','a', encoding='utf-8') as f:
|
|
f.write(f"[{ts}] {msg}\n")
|
|
except Exception:
|
|
print(msg)
|
|
|
|
close_after = False
|
|
if sftp is None:
|
|
cnopts = pysftp.CnOpts()
|
|
cnopts.hostkeys = None
|
|
sftp = pysftp.Connection(instructure_url, username=instructure_username, private_key=instructure_private_key, cnopts=cnopts)
|
|
sftp.chdir('SIS')
|
|
close_after = True
|
|
try:
|
|
now = datetime.datetime.now()
|
|
if not label_hour:
|
|
# default fallback: floor to hour
|
|
label_hour = now.replace(minute=0, second=0, microsecond=0).strftime('%Y-%m-%d-%H')
|
|
|
|
files = set(sftp.listdir())
|
|
log(f"fetch_current_rosters: label={label_hour} files={sorted(files)}")
|
|
need = ['login','users','courses','enrollments']
|
|
saved = []
|
|
for name in need:
|
|
remote = f'{name}.csv'
|
|
target = f'cache/rosters/{name}-{label_hour}.csv'
|
|
try:
|
|
if remote in files:
|
|
if not (os.path.exists(target) and os.path.getsize(target) > 0):
|
|
temp = target + '.part'
|
|
try:
|
|
if os.path.exists(temp):
|
|
os.remove(temp)
|
|
except Exception:
|
|
pass
|
|
sftp.get(remote, temp)
|
|
# atomic replace
|
|
os.replace(temp, target)
|
|
saved.append(remote)
|
|
log(f' saved {remote} -> {target}')
|
|
else:
|
|
log(f' already exists: {target}, skipping')
|
|
else:
|
|
log(f' missing on server: {remote}')
|
|
except Exception as ex:
|
|
log(f' download failed: {remote} -> {target} err={ex}')
|
|
|
|
if len(saved) >= 3 and 'courses.csv' in saved or os.path.exists(f'cache/rosters/courses-{label_hour}.csv'):
|
|
try:
|
|
with open(f'cache/rosters/courses-{label_hour}.csv','r') as courses:
|
|
courses.readline()
|
|
a = courses.readline()
|
|
parts = a.split(',')
|
|
year = parts[1][0:4]
|
|
ss = parts[1][4:6]
|
|
sem = {'30':'spring', '50':'summer', '70':'fall' }
|
|
this_sem = sem.get(ss, 'spring')
|
|
log(f"post-process for semester={this_sem} year={year} label={label_hour}")
|
|
convert_roster_files(this_sem,year,label_hour)
|
|
move_to_folder(this_sem,year,label_hour,saved)
|
|
# After moving into semester folder, compute enrollment changes timeline
|
|
try:
|
|
compute_enrollment_changes(this_sem, year)
|
|
except Exception as ex_changes:
|
|
log(f'enrollment change computation failed: {ex_changes}')
|
|
except Exception as expp:
|
|
log(f'post-processing failed: {expp}')
|
|
else:
|
|
log('not enough files to post-process yet')
|
|
finally:
|
|
if close_after:
|
|
try:
|
|
sftp.close()
|
|
except Exception:
|
|
pass
|
|
|
|
def fetch_current_rosters_auto(poll_seconds=15):
|
|
"""Poll Instructure SFTP from the top of each hour until all 4 roster files appear, then download once.
|
|
- Robust logging to cache/pipeline.log.txt
|
|
- Stops polling for the remainder of that hour after success
|
|
- Tries again on the next hour
|
|
"""
|
|
import pysftp
|
|
from contextlib import contextmanager
|
|
|
|
log_path = 'cache/pipeline.log.txt'
|
|
|
|
def log(msg):
|
|
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
try:
|
|
with open(log_path, 'a', encoding='utf-8') as f:
|
|
f.write(f"[{ts}] {msg}\n")
|
|
except Exception:
|
|
print(f"[log-fail] {msg}")
|
|
|
|
@contextmanager
|
|
def sftp_conn():
|
|
cnopts = pysftp.CnOpts()
|
|
cnopts.hostkeys = None
|
|
conn = None
|
|
try:
|
|
conn = pysftp.Connection(instructure_url, username=instructure_username,
|
|
private_key=instructure_private_key, cnopts=cnopts)
|
|
conn.chdir('SIS')
|
|
yield conn
|
|
finally:
|
|
try:
|
|
if conn:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
|
|
required = {'login.csv', 'users.csv', 'courses.csv', 'enrollments.csv'}
|
|
current_window = None # e.g., '2025-08-30-14'
|
|
window_done = False
|
|
window_end = None
|
|
|
|
def compute_window(now):
|
|
"""Return (label_hour_str, window_end_dt) if within a window, else (None, None).
|
|
Window spans [H-5m, H+5m] around each top-of-hour H.
|
|
If minute >= 55 -> window is next hour
|
|
If minute <= 5 -> window is current hour
|
|
Else -> not in window.
|
|
"""
|
|
m = now.minute
|
|
if m >= 55:
|
|
base = (now.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1))
|
|
elif m <= 5:
|
|
base = now.replace(minute=0, second=0, microsecond=0)
|
|
else:
|
|
return (None, None)
|
|
label = base.strftime('%Y-%m-%d-%H')
|
|
end = base + timedelta(minutes=5) # end of window
|
|
return (label, end)
|
|
|
|
log('fetch_current_rosters_auto: starting poll loop')
|
|
while True:
|
|
now = datetime.datetime.now()
|
|
hour_key = now.strftime('%Y-%m-%d %H')
|
|
label, enddt = compute_window(now)
|
|
if label is None:
|
|
time.sleep(max(5, int(poll_seconds)))
|
|
continue
|
|
|
|
if label != current_window:
|
|
current_window = label
|
|
window_done = False
|
|
window_end = enddt
|
|
log(f'New window: {current_window} (until {window_end.strftime("%H:%M:%S")}). Resetting state.')
|
|
|
|
if window_done:
|
|
time.sleep(5)
|
|
continue
|
|
|
|
# Poll SFTP for files
|
|
try:
|
|
with sftp_conn() as sftp:
|
|
files = set(sftp.listdir())
|
|
missing = list(required - files)
|
|
if missing:
|
|
log(f'Not ready yet. Missing: {missing}')
|
|
else:
|
|
log('All required files present inside window. Starting download...')
|
|
try:
|
|
fetch_current_rosters(sftp=sftp, label_hour=current_window)
|
|
log('Download complete. Marking window_done for this window.')
|
|
window_done = True
|
|
except Exception as ex_dl:
|
|
import traceback
|
|
log('Download failed: ' + str(ex_dl))
|
|
log(traceback.format_exc())
|
|
except Exception as ex_list:
|
|
import traceback
|
|
log('SFTP list failed: ' + str(ex_list))
|
|
log(traceback.format_exc())
|
|
|
|
# Check for window timeout
|
|
try:
|
|
if not window_done and window_end and now >= window_end:
|
|
log(f'REPORT: window {current_window} ended without all files. Consider alerting/email.')
|
|
window_done = True # stop further checks this window
|
|
except Exception:
|
|
pass
|
|
|
|
time.sleep(max(5, int(poll_seconds)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
################
|
|
################ SENDING DATA AWAY
|
|
################
|
|
################
|
|
################
|
|
|
|
# Upload a json file to www
|
|
def put_file(remotepath,localpath, localfile,prompt=1):
|
|
import pysftp
|
|
show_all = 0
|
|
folder = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
|
cnopts = pysftp.CnOpts()
|
|
cnopts.hostkeys = None
|
|
|
|
with pysftp.Connection(FTP_SITE,username=FTP_USER, password=FTP_PW,cnopts=cnopts) as sftp:
|
|
#todo: these paths
|
|
#files = sftp.listdir()
|
|
#print(folder + "\tI see these files on remote: ", files, "\n")
|
|
sftp.chdir(remotepath)
|
|
files = sftp.listdir()
|
|
if show_all: print(folder + "\tI see these files on remote: ", files, "\n")
|
|
localf = os.listdir(localpath)
|
|
if show_all: print("I see these local: ", localf)
|
|
if prompt:
|
|
input('ready to upload')
|
|
sftp.put(localpath+localfile, localfile, preserve_mtime=True)
|
|
sftp.close()
|
|
|
|
|
|
|
|
def process_reg_history(term='fa25'):
|
|
from collections import defaultdict
|
|
from itertools import groupby
|
|
from operator import itemgetter
|
|
|
|
def read_grouped_csv(path):
|
|
with open(path, newline='') as f:
|
|
fieldnames = ['datetime', 'crn', 'course', 'teacher', 'max', 'enrolled', 'waitlistmax', 'waitlisted']
|
|
reader = csv.DictReader(f, fieldnames=fieldnames)
|
|
rows = sorted(reader, key=lambda r: r['datetime']) # Group by timestamp
|
|
grouped = {}
|
|
for ts, group in groupby(rows, key=itemgetter('datetime')):
|
|
grouped[ts] = {r['crn']: r for r in group}
|
|
return grouped
|
|
|
|
def crossed_threshold(old_val, new_val, max_val):
|
|
thresholds = [0.25, 0.5, 0.75, 1.0]
|
|
if int(max_val) == 0:
|
|
return False, None
|
|
old_ratio = int(old_val) / int(max_val)
|
|
new_ratio = int(new_val) / int(max_val)
|
|
for t in thresholds:
|
|
if old_ratio < t <= new_ratio:
|
|
return True, int(t * 100)
|
|
return False, None
|
|
|
|
def detect_changes(prev, curr):
|
|
changes = defaultdict(list)
|
|
|
|
all_crns = prev.keys() | curr.keys()
|
|
for crn in all_crns:
|
|
o, n = prev.get(crn), curr.get(crn)
|
|
if not o:
|
|
changes[crn].append((n['datetime'], "Section was added."))
|
|
elif not n:
|
|
changes[crn].append((
|
|
o['datetime'],
|
|
f"Section was removed (last seen: teacher {o['teacher']}, "
|
|
f"{o['enrolled']}/{o['max']} enrolled, {o['waitlisted']}/{o['waitlistmax']} waitlisted)."
|
|
))
|
|
else:
|
|
dt = n['datetime']
|
|
if o['teacher'] != n['teacher']:
|
|
changes[crn].append((dt, f"Teacher changed from {o['teacher']} to {n['teacher']}."))
|
|
if o['enrolled'] != n['enrolled']:
|
|
crossed, percent = crossed_threshold(o['enrolled'], n['enrolled'], n['max'])
|
|
if crossed:
|
|
changes[crn].append((dt, f"Enrollment crossed {percent}% ({n['enrolled']}/{n['max']})."))
|
|
if int(n['waitlisted']) > 10 and o['waitlisted'] != n['waitlisted']:
|
|
changes[crn].append((dt, f"Waitlist exceeds 10: {n['waitlisted']}."))
|
|
return changes
|
|
|
|
def time_to_iso(s):
|
|
return datetime.datetime.strptime(s, "%Y-%m-%dT%H-%M").isoformat()
|
|
|
|
def detect_changes_structured(prev, curr):
|
|
changes = defaultdict(list)
|
|
|
|
all_crns = prev.keys() | curr.keys()
|
|
for crn in all_crns:
|
|
o, n = prev.get(crn), curr.get(crn)
|
|
if not o:
|
|
changes[crn].append({'time':time_to_iso(n['datetime']), "type":'section update', 'message': "Section was added."})
|
|
elif not n:
|
|
changes[crn].append(
|
|
{'time':time_to_iso(o['datetime']), "type":'section update', 'message': "Section was removed.",
|
|
'value': o['enrolled'], 'capacity': o['max'], })
|
|
else:
|
|
dt = time_to_iso(n['datetime'])
|
|
if o['teacher'] != n['teacher']:
|
|
changes[crn].append({'time':dt, "type":'teacher_change',
|
|
'message': f"Teacher changed from {o['teacher']} to {n['teacher']}.",
|
|
'old_teacher': o['teacher'], 'new_teacher': n['teacher'], })
|
|
if o['enrolled'] != n['enrolled']:
|
|
crossed, percent = crossed_threshold(o['enrolled'], n['enrolled'], n['max'])
|
|
if crossed:
|
|
changes[crn].append({'time':dt, "type":'enrollment_milestone',
|
|
'message': f"Enrollment crossed {percent}% ({n['enrolled']}/{n['max']}).",
|
|
'percent':percent,'value':n['enrolled'],'capacity':n['max'] })
|
|
if int(n['waitlisted']) > 10 and o['waitlisted'] < n['waitlisted']:
|
|
changes[crn].append({'time':dt, "type":'enrollment_milestone',
|
|
'message': f"Waitlist exceeds 10: {n['waitlisted']}).",
|
|
'value':n['waitlisted']})
|
|
return changes
|
|
|
|
|
|
def process_diff_timeline(path):
|
|
snapshots = read_grouped_csv(path)
|
|
timeline = sorted(snapshots.keys())
|
|
timeline_diffs = []
|
|
timeline_diffs_structured = []
|
|
course_names = {} # crn -> latest known course name
|
|
|
|
for i in range(1, len(timeline)):
|
|
prev_ts, curr_ts = timeline[i-1], timeline[i]
|
|
prev, curr = snapshots[prev_ts], snapshots[curr_ts]
|
|
|
|
# update course name map
|
|
for crn, row in curr.items():
|
|
course_names[crn] = row['course']
|
|
|
|
delta = detect_changes(prev, curr)
|
|
timeline_diffs.append(delta)
|
|
|
|
delta_structured = detect_changes_structured(prev,curr)
|
|
timeline_diffs_structured.append(delta_structured)
|
|
|
|
# Flatten and group by crn
|
|
crn_changes = defaultdict(list)
|
|
for delta in timeline_diffs:
|
|
for crn, changes in delta.items():
|
|
crn_changes[crn].extend(changes)
|
|
|
|
# Flatten and group by crn
|
|
crn_changes_structured = defaultdict(list)
|
|
for delta in timeline_diffs_structured:
|
|
for crn, changes in delta.items():
|
|
crn_changes_structured[crn].extend(changes)
|
|
|
|
# Sort changes for each CRN by datetime
|
|
for crn in crn_changes:
|
|
crn_changes[crn].sort(key=lambda x: x[0])
|
|
|
|
# Sort changes for each CRN by datetime
|
|
for crn in crn_changes_structured:
|
|
crn_changes[crn].sort(key=lambda x: x[0])
|
|
|
|
return crn_changes, crn_changes_structured, course_names
|
|
|
|
fresh_history = requests.get(f"http://gavilan.cc/schedule/reg_history_{term}.csv").text
|
|
fresh_file = codecs.open(f'cache/reg_history_{term}.csv','w','utf-8')
|
|
fresh_file.write(fresh_history)
|
|
fresh_file.close()
|
|
|
|
output1 = codecs.open(f'cache/reg_timeline_{term}.txt','w','utf-8')
|
|
output2 = codecs.open(f'cache/reg_timeline_{term}.json','w','utf-8')
|
|
changes, changes_structured, course_names = process_diff_timeline(f"cache/reg_history_{term}.csv")
|
|
|
|
# once for plain text
|
|
|
|
for crn in sorted(changes, key=lambda c: course_names.get(c, "")):
|
|
course = course_names.get(crn, "")
|
|
course_output = {'code': course, 'crn':crn,'events':[]}
|
|
print(f"\n{course} (CRN {crn}):")
|
|
output1.write(f"\n{course} (CRN {crn}):\n")
|
|
for dt, msg in changes[crn]:
|
|
print(f" [{dt}] {msg}")
|
|
output1.write(f" [{dt}] {msg}\n")
|
|
|
|
course_output['events'].append({'message':msg, 'time':time_to_iso(dt)})
|
|
|
|
# again for structured
|
|
crn_list = []
|
|
|
|
for crn in sorted(changes_structured, key=lambda c: course_names.get(c, "")):
|
|
course = course_names.get(crn, "")
|
|
course_output = {'code': course, 'crn':crn,'events':changes_structured[crn]}
|
|
crn_list.append(course_output)
|
|
|
|
output2.write( json.dumps(crn_list,indent=2) )
|
|
output2.close()
|
|
|
|
|
|
|
|
def recreate_all():
|
|
# Use canonical semester short codes from semesters.py
|
|
try:
|
|
from semesters import code as SEM_CODES
|
|
except Exception:
|
|
SEM_CODES = []
|
|
for x in SEM_CODES:
|
|
try:
|
|
recreate_reg_data(x)
|
|
except Exception as e:
|
|
print(f'Failed on {x} with: {e}')
|
|
|
|
|
|
def recreate_reg_data(term="fa25"):
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
|
|
def parse_row(row):
|
|
dt = datetime.strptime(row['datetime'], "%Y-%m-%dT%H-%M")
|
|
crn = row['crn']
|
|
enrolled = int(row['enrolled'])
|
|
return dt, row['datetime'], crn, enrolled
|
|
|
|
def reduce_latest_per_day(rows):
|
|
latest = defaultdict(dict) # latest[crn][date] = (dt, ts, enrolled)
|
|
latest_ts_by_date = {} # date → (dt, ts) for header naming
|
|
|
|
for row in rows:
|
|
dt, full_ts, crn, enrolled = parse_row(row)
|
|
date_str = dt.date().isoformat()
|
|
ts_header = dt.strftime("%Y-%m-%dT%H") # <-- this is what we want
|
|
|
|
# for each crn, per day, keep latest reading
|
|
if date_str not in latest[crn] or dt > latest[crn][date_str][0]:
|
|
latest[crn][date_str] = (dt, ts_header, enrolled)
|
|
|
|
# also record latest timestamp per day for consistent column headers
|
|
if date_str not in latest_ts_by_date or dt > latest_ts_by_date[date_str][0]:
|
|
latest_ts_by_date[date_str] = (dt, ts_header)
|
|
|
|
return latest, [ts for _, ts in sorted(latest_ts_by_date.values())]
|
|
|
|
def pivot_table(latest, headers):
|
|
crns = sorted(latest)
|
|
table = []
|
|
|
|
for crn in crns:
|
|
row = [crn]
|
|
for ts in headers:
|
|
date_str = ts[:10] # match on YYYY-MM-DD
|
|
val = latest[crn].get(date_str)
|
|
if val and val[1] == ts:
|
|
row.append(str(val[2]))
|
|
else:
|
|
row.append("")
|
|
table.append(row)
|
|
|
|
return ['crn'] + headers, table
|
|
|
|
#with open(f"cache/reg_history_{term}.csv", newline='') as f:
|
|
from io import StringIO
|
|
url = f"https://gavilan.cc/schedule/reg_history_{term}.csv"
|
|
|
|
# Download
|
|
resp = requests.get(url)
|
|
resp.raise_for_status() # raises if bad status
|
|
|
|
# Wrap the text in a file-like object
|
|
f = StringIO(resp.text)
|
|
|
|
fieldnames = ['datetime', 'crn', 'course', 'teacher', 'max', 'enrolled', 'waitlistmax', 'waitlisted']
|
|
reader = csv.DictReader(f, fieldnames=fieldnames)
|
|
rows = list(reader)
|
|
|
|
latest, headers = reduce_latest_per_day(rows)
|
|
header_row, table = pivot_table(latest, headers)
|
|
|
|
with open(f"cache/reg_data_{term}.csv", "w", newline='') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(header_row)
|
|
writer.writerows(table)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print ('')
|
|
options = { 1: ['Fetch rosters on schedule',fetch_current_rosters_auto] ,
|
|
2: ['Get canvas data 2024 style', canvas_data_2024_run ],
|
|
3: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
|
|
4: ['Narrative timeline of section updates', process_reg_history],
|
|
5: ['Create narrative format all semesters', recreate_all],
|
|
6: ['Recreate reg_data from full reg history', recreate_reg_data],
|
|
7: ['Compute enrollment changes', compute_enrollment_changes],
|
|
}
|
|
|
|
'''1: ['Re-create schedule csv and json files from raw html',recent_schedules] ,
|
|
2: ['Fetch rosters',fetch_current_rosters] ,
|
|
3:
|
|
4: ['Compute how registration is filling up classes', schedule_filling] ,
|
|
5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] ,
|
|
6: ['Canvas data: interactive sync', interactive ],
|
|
7: ['Canvas data: automated sync', sync_non_interactive ],
|
|
8:
|
|
9:
|
|
16: ['Scrape schedule from ssb', scrape_schedule_multi ],
|
|
14: ['Generate latestart schedule', list_latestarts ],
|
|
15: ['Test ssb calls with python', scrape_schedule_py ],
|
|
10: ['schedule to db', scrape_for_db ],
|
|
11: ['clean argos draft schedule file', argos_data_from_cvc],
|
|
12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
|
|
13: ['Parse deanza schedule', dza_sched ],
|
|
'''
|
|
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|
|
|
|
# Testing
|
|
|
|
#if __name__ == "__main__":
|
|
#users = fetch('/api/v1/courses/69/users?per_page=100',1)
|
|
#print "These are the users: "
|
|
#print users
|
|
|
|
#getSemesterSchedule()
|
|
|
|
|
|
#get_doc()
|
|
#pass
|