2133 lines
74 KiB
Python
2133 lines
74 KiB
Python
# Local data, saving and manipulating
|
|
|
|
import os, re, gzip, codecs, funcy, pytz, sqlite3, json, random, functools, requests, sys, csv
|
|
import pandas as pd
|
|
import numpy as np
|
|
from collections import defaultdict
|
|
from datetime import datetime as dt
|
|
from datetime import timedelta
|
|
from dateutil.parser import parse
|
|
from os.path import exists, getmtime
|
|
from pipelines import sync_non_interactive, url, header, gp, dean
|
|
|
|
#from courses import getCoursesInTerm
|
|
#from courses import user_in_depts_live
|
|
|
|
mycourses = {}
|
|
|
|
local_data_folder = 'cache/canvas_data/'
|
|
#sqlite_file = local_data_folder + 'data20230613.db' #'data_su20_4hr_blocks.db'
|
|
sqlite_file = local_data_folder + 'data.db' #'data_su20_4hr_blocks.db'
|
|
mylog = codecs.open(local_data_folder + 'canvas_data_log.txt','w')
|
|
|
|
thefiles_dat = {}
|
|
try:
|
|
for L in open('cache/canvas_data_index.txt','r').readlines():
|
|
L = L.strip()
|
|
(fname,start,finish) = L.split(',')
|
|
thefiles_dat[fname] = start
|
|
except Exception as e:
|
|
print("cache/canvas_data_index.txt was not found")
|
|
|
|
thefiles = open('cache/canvas_data_index_temp.txt','a') # rename me if nothing crashes :)
|
|
|
|
|
|
NUM_ONLY = 1 # use numeric codes instead of strings. For mathy stuff
|
|
|
|
requests_sum1_format = "id userid courseid timeblock viewcount partcount".split(" ")
|
|
requests_sum1_types = "INTEGER PRIMARY KEY AUTOINCREMENT,text,text,INTEGER,INTEGER,INTEGER".split(",")
|
|
requests_format = "id timestamp year month day userid courseid rootid course_acct_id quizid discussionid conversationid assignmentid url useragent httpmethod remoteip micros controller action contexttype contextid realid sessionid agentid httpstatus httpversion developer_key_id time_block".split(" ")
|
|
users_format = "id canvasid rootactid name tz created vis school position gender locale public bd cc state sortablename globalid".split(" ")
|
|
cc_format = "id canvasid userid address type position state created updated".split(" ")
|
|
term_format = "id canvasid rootid name start end sis".split(" ")
|
|
course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid schedule".split(" ")
|
|
role_format = "id canvas_id root_account_id account_id name base_role_type workflow_state created_at updated_at deleted_at".split(" ")
|
|
course_score_format = "s_id c_id a_id course_id enrol_id current final muted_current muted_final".split(" ")
|
|
course_section_dim_format = "id canvas_id name course_id enrollment_term_id default_section accepting_enrollments can_manually_enroll start_at end_at created_at workflow_state restrict_enrollments_to_section_dates nonxlist_course_id sis_source_id".split(" ")
|
|
enrollment_dim_format = "id cid root course_section role type workflow created updated start end complete self sis course_id user_id last_activity".split(" ")
|
|
communication_channel_dim_format = "id canvas_id user_id address type position workflow_state created_at updated_at".split(" ")
|
|
pseudonym_dim_format = "id canvas_id user_id account_id workflow_state last_request_at last_login_at current_login_at last_login_ip current_login_ip position created_at updated_at password_auto_generated deleted_at sis_user_id unique_name integration_id authentication_provider_id".split(" ")
|
|
conversation_dim_format = "id canvas_id has_media_objects subject course_id group_id account_id".split(" ")
|
|
conversation_message_dim_format = "id canvas_id conversation_id author_id created_at generated has_attachments has_media_objects body".split(" ")
|
|
|
|
|
|
|
|
|
|
|
|
unwanted_req_paths = """conversations/unread_count
|
|
CFNetwork
|
|
TooLegit
|
|
lti_user_id
|
|
brand_variables
|
|
dashboard-sidebar
|
|
dashboard_cards
|
|
ping
|
|
self/profile
|
|
login/oauth2
|
|
login/session_token
|
|
self/colors
|
|
self/profile
|
|
images/thumbnails
|
|
auth/login
|
|
auth/conversations
|
|
backup/login
|
|
blackboard ally
|
|
Proctorio
|
|
image_thumbnail
|
|
manifest.json
|
|
launch_definitions/login
|
|
login
|
|
python-requests
|
|
custom_data
|
|
content_shares
|
|
pandata_events
|
|
trypronto
|
|
users/self """.split("\n")
|
|
|
|
other_interesting_events = { }
|
|
|
|
DB_CON = 0
|
|
DB_CUR = 0
|
|
|
|
#########
|
|
######### LOCAL DB
|
|
#########
|
|
|
|
def db(file=sqlite_file):
|
|
global DB_CON, DB_CUR
|
|
if DB_CON:
|
|
return (DB_CON,DB_CUR)
|
|
print('grabbing db connection')
|
|
DB_CON = sqlite3.connect(file)
|
|
DB_CUR = DB_CON.cursor()
|
|
|
|
return (DB_CON, DB_CUR)
|
|
|
|
|
|
def setup_table(table='requests'):
|
|
(con,cur) = db()
|
|
q = ''
|
|
|
|
|
|
if table=='conversation':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS conversation (\n"
|
|
for L in conversation_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
|
|
if table=='conversation_message':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS conversation_message (\n"
|
|
for L in conversation_message_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
|
|
if table=='requests_sum1':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS requests_sum1 (\n"
|
|
for j, L in enumerate(requests_sum1_format):
|
|
if j:
|
|
(col,typ) = (L,requests_sum1_types[j])
|
|
q += ",\n\t%s %s" % (col,typ)
|
|
else:
|
|
(col,typ) = (L,requests_sum1_types[j])
|
|
q += "\t%s %s" % (col,typ)
|
|
|
|
q += "\n);\n"
|
|
print(q)
|
|
cur.execute(q)
|
|
|
|
q = "CREATE UNIQUE INDEX index1 ON requests_sum1(userid,courseid,timeblock);"
|
|
|
|
if table=='requests':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS requests (\n"
|
|
for L in open('cache/request_table.txt','r').readlines():
|
|
L = L.strip()
|
|
#print(L)
|
|
(col,type) = re.split("\s\s\s\s",L)
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='users':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS users (\n"
|
|
for L in users_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='pseudonym':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS pseudonym(\n"
|
|
for L in pseudonym_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
|
|
|
|
if table=='courses':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS courses (\n"
|
|
for L in course_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='course_sections':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS course_sections (\n"
|
|
for L in course_section_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='enrollment':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS enrollment (\n"
|
|
for L in enrollment_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='comm_channel':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS comm_channel (\n"
|
|
for L in communication_channel_dim_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='terms':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS terms (\n"
|
|
for L in term_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
q += "\n);"
|
|
|
|
if table=='roles':
|
|
first = 1
|
|
q = "CREATE TABLE IF NOT EXISTS roles (\n"
|
|
for L in role_format:
|
|
(col,type) = (L,'text')
|
|
if not first:
|
|
q += ",\n"
|
|
first = 0
|
|
q += "\t%s %s" % (col,type)
|
|
return q + "\n);"
|
|
if table == 'summary':
|
|
q = """CREATE TABLE "summary_course_user_views" (
|
|
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
"courseid" TEXT,
|
|
"course_canvasid" TEXT,
|
|
"username" TEXT,
|
|
"userid" TEXT,
|
|
"user_canvasid" TEXT,
|
|
"count" INTEGER,
|
|
"time_block" INTEGER )"""
|
|
if q:
|
|
print(q)
|
|
cur.execute(q)
|
|
con.commit()
|
|
return
|
|
|
|
if table == 'index':
|
|
for q in [ #'CREATE INDEX "idx_req_userid" ON "requests" ("id","courseid","userid" );',
|
|
'CREATE INDEX "idx_users_id" ON "users" ("id","canvasid" );',
|
|
'CREATE INDEX "idx_term_id" ON "terms" ("id","canvasid" );',
|
|
'CREATE INDEX "idx_enrollment" ON "enrollment" ("cid","course_id","user_id" );',
|
|
'CREATE INDEX "idx_courses" ON "courses" ("id","canvasid","termid" );' ]:
|
|
#print(q)
|
|
cur.execute(q)
|
|
con.commit()
|
|
|
|
# Help the next function to upload new users directly to conf database on gavilan.
|
|
def employees_refresh_flex(data):
|
|
try:
|
|
data['a'] = 'set/newuser'
|
|
data['sis_user_id'] = data['sis_user_id'][3:]
|
|
print("\nUploading this: \n")
|
|
print(json.dumps(data, indent=2))
|
|
print("\n")
|
|
a = input("Continue (y) or skip (n) ? ")
|
|
if a == 'y':
|
|
# This is what I was missing..........
|
|
# req.add_header("Content-type", "application/x-www-form-urlencoded")
|
|
r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data)
|
|
print(r3.text)
|
|
#print(r3.headers)
|
|
except Exception as ex:
|
|
print("Failed on: %s\nErr: %s" % (str(data),str(ex)))
|
|
|
|
|
|
|
|
# Everyone in iLearn DB with an xyz@gavilan.edu email address.
|
|
def all_gav_employees():
|
|
(connection,cursor) = db()
|
|
connection.row_factory = dict_factory
|
|
q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state,
|
|
h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip,
|
|
p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u
|
|
JOIN comm_channel AS h ON u.id=h.user_id
|
|
JOIN pseudonym AS p ON p.user_id=u.id
|
|
WHERE h.address LIKE "%@gavilan.edu"
|
|
ORDER BY u.sortablename"""
|
|
cursor = connection.cursor()
|
|
cursor.execute(q)
|
|
everyone = cursor.fetchall()
|
|
everyone_set = set()
|
|
for E in everyone:
|
|
try:
|
|
everyone_set.add( E['address'].lower() )
|
|
except Exception as e:
|
|
print("Exception: %s\nwith: %s" % (str(e), str(E)))
|
|
|
|
oo = open('cache/temp1.txt','w')
|
|
oo.write(json.dumps(list(everyone_set), indent=2))
|
|
existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users')
|
|
ex = json.loads( existing.text )
|
|
already_enrolled = set()
|
|
for usr in ex['users']:
|
|
try:
|
|
#already_enrolled.add( (usr['goo'], usr['email'].lower(), usr['name']) )
|
|
already_enrolled.add( usr['email'].lower() )
|
|
except Exception as e:
|
|
print("Exception: %s\nWith: %s" % (str(e),str(usr)))
|
|
|
|
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
|
|
oo.write(json.dumps(list(already_enrolled), indent=2))
|
|
|
|
# conf_users wants: goo, email, name, active
|
|
# and emails have random capitalization
|
|
# name is First Last, and sometimes with Middle in there.
|
|
#
|
|
|
|
# using sets: to_enroll = [ x for x in students if x not in already_enrolled ]
|
|
new_emp = [ x for x in everyone_set if x not in already_enrolled ]
|
|
|
|
# take the all_employee list, filter -> anyone who's in 'existing' is removed
|
|
|
|
# funcy.where( lambda x: x['email'] == ae[4] , existing )
|
|
|
|
#new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ), all_emp ))
|
|
#new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] ))
|
|
print(new_emp)
|
|
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
|
|
oo.write(json.dumps(list(new_emp), indent=2))
|
|
|
|
# Now, iLearn db (everyone)... find the rows that match the email addresses
|
|
# that we've decided we need to add (new_emp)
|
|
|
|
#print(everyone)
|
|
#print( "searching for %s" % j )
|
|
#print( "searched for %s, found: %s" % (j, str(to_add) ))
|
|
#print("\nUploading...\n")
|
|
for j in new_emp:
|
|
#j = new_emp[0]
|
|
print(j)
|
|
to_add = list(funcy.where( everyone, address=j ))
|
|
if to_add:
|
|
employees_refresh_flex(to_add[0])
|
|
else:
|
|
print("Didn't find an entry for that account.")
|
|
print("done uploading")
|
|
|
|
#
|
|
def teachers_courses_semester():
|
|
q = """SELECT c.id, c.canvasid AS course_cid, c.name, c.code, u.name, u.sortablename, u.canvasid AS user_cid FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.sis LIKE "202070-%"
|
|
AND NOT c.state="deleted"
|
|
AND e."type"="TeacherEnrollment"
|
|
ORDER BY u.sortablename"""
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
all_teachers = cursor.fetchall()
|
|
return all_teachers
|
|
#
|
|
def teachers_by_term():
|
|
q = """SELECT c.id as course_id, c.canvasid as course_c_id, c.name, c.code, c.created as course_created, c.start, c.visible, c.state, e.last_activity,
|
|
u.id as user_id, u.canvasid as user_c_id, u.sortablename, u.created as user_created
|
|
FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.sis LIKE "202070%"
|
|
AND e."type"="TeacherEnrollment"
|
|
ORDER BY c.code"""
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
all_teachers = cursor.fetchall()
|
|
|
|
|
|
|
|
# Report for AEC
|
|
def aec_su20_report():
|
|
global mycourses
|
|
#AE 600 (80040; 80045; 80047) 10945
|
|
#AE 602 (80048; 80049; 80050) 10746
|
|
#AE 636 (80332; 80381) 10783
|
|
#CSIS 571A (80428) 10956
|
|
#GUID 558A (80429) 10957
|
|
import csv
|
|
|
|
course_id = "10957"
|
|
course_label = "GUID 558A 80429"
|
|
|
|
(connection,cursor) = db()
|
|
sections = "10945 10746 10783 10956 10957".split(" ")
|
|
|
|
for course_id in sections:
|
|
if 0:
|
|
for course_id in sections:
|
|
q = """SELECT c.code, u.sortablename, c.id, e.user_id,
|
|
c.canvasid FROM courses AS c JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s""" % course_id
|
|
cursor.execute(q)
|
|
|
|
for row in cursor:
|
|
print(row)
|
|
mycourses[row[2]] = ''
|
|
return
|
|
|
|
|
|
grp_sum_qry = """SELECT u.sortablename, r.timeblock, SUM(r.viewcount), u.canvasid AS user, c.canvasid
|
|
FROM requests_sum1 AS r
|
|
JOIN courses AS c ON r.courseid=c.id
|
|
JOIN enrollment as e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=r.userid
|
|
WHERE c.canvasid=%s
|
|
GROUP BY r.userid,c.id,r.timeblock
|
|
ORDER BY u.sortablename ,r.timeblock """ % course_id
|
|
|
|
cursor.execute( grp_sum_qry )
|
|
with codecs.open("cache/aec_%s.csv" % course_id, "w", "utf-8") as write_file:
|
|
c_out = csv.writer(write_file)
|
|
c_out.writerow( ['name','timeblock','viewcount','timestamp','minutes'] )
|
|
|
|
rows = [list(row) for row in cursor]
|
|
print("Got %i records" % len(rows))
|
|
compressed_rows = []
|
|
|
|
last_timeblock = -1
|
|
last_R = []
|
|
current_minute = 0
|
|
current_name = ""
|
|
uptodate = 1
|
|
for R in rows:
|
|
print(" %s\t%s " % (R[0], current_name) )
|
|
if R[0] != current_name:
|
|
if not uptodate:
|
|
last_R.append(current_minute)
|
|
last_R.pop(1)
|
|
last_R.pop(2)
|
|
last_R.pop(2)
|
|
compressed_rows.append(last_R)
|
|
uptodate = 1
|
|
last_timeblock = -1
|
|
last_R = []
|
|
current_minute = 0
|
|
current_name = R[0]
|
|
|
|
if R[2] < 3: continue
|
|
if R[1] != last_timeblock+1 and len(last_R):
|
|
# non contiguous timeblock, save the last row and reset counters
|
|
last_timeblock = R[1]
|
|
|
|
R.append( str(dt_from_timeblock( R[1] )) )
|
|
|
|
last_R.append(current_minute)
|
|
current_minute = 15
|
|
|
|
#last_R.pop(1)
|
|
last_R.pop(3)
|
|
last_R.pop(3)
|
|
|
|
compressed_rows.append(last_R) # makes a copy of list. dunno if thats necessary
|
|
#print(last_R)
|
|
|
|
last_R = R
|
|
uptodate = 1
|
|
else:
|
|
# contiguous or first timeblock
|
|
current_minute += 15
|
|
last_timeblock = R[1]
|
|
if len(last_R):
|
|
last_R[2] = int(last_R[2]) + int(R[2]) # add the views
|
|
# its contiguous, so we already have a last_R we're building on
|
|
else:
|
|
last_R = R[:] # clone it.
|
|
uptodate = 0
|
|
if not uptodate:
|
|
last_R.append(current_minute)
|
|
last_R.pop(1)
|
|
last_R.pop(2)
|
|
last_R.pop(2)
|
|
compressed_rows.append(last_R)
|
|
|
|
|
|
for R in compressed_rows:
|
|
c_out.writerow(R)
|
|
|
|
# Build up a report for everyone
|
|
outfile = codecs.open('cache/positive_attendance_%s.csv' % course_id , 'w', 'utf-8')
|
|
pa_out = csv.writer(outfile)
|
|
pa_out.writerow( ['name','date','viewcount','minutes'] )
|
|
|
|
people = funcy.group_by(lambda x: x[0], compressed_rows)
|
|
for P in people:
|
|
if P in ['Ally','Burgman, Lorraine','Horta, Gilbert','Mendez, Frank','Student, Test']:
|
|
continue
|
|
outrows = [ [P,''] ]
|
|
try:
|
|
|
|
#print(P)
|
|
#print(people[P])
|
|
for x in people[P][1:]:
|
|
outrows.append( [ '', x[3], x[2],x[4] ] )
|
|
mins = list(map( lambda x: x[4], people[P][1:]))
|
|
print(mins)
|
|
total_min = functools.reduce( lambda x, y: int(x)+int(y), mins)
|
|
outrows.append( ['Total minutes', total_min] )
|
|
print("Total minutes is %i." % total_min)
|
|
hours = total_min / 60.0
|
|
outrows.append( ['Total hours', hours] )
|
|
print("Total hours is %0.1f." % hours)
|
|
outrows.append( [] )
|
|
outrows.append( [] )
|
|
|
|
for x in outrows:
|
|
print(x)
|
|
pa_out.writerow(x)
|
|
except Exception as e:
|
|
print("Some sort of error: %s" % str(e))
|
|
|
|
|
|
|
|
connection.close()
|
|
print("Wrote output file to: %s" % "cache/aec_%s.csv" % course_label)
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
HELPERS
|
|
|
|
Whos in a course?
|
|
SELECT * FROM enrollment as e JOIN courses AS c ON e.course_id=c.id WHERE c.canvasid=10957 ; AND c.worflow=active
|
|
|
|
"""
|
|
|
|
|
|
##########
|
|
##########
|
|
########## JUST LOADING FROM FILE
|
|
##########
|
|
##########
|
|
|
|
|
|
######################
|
|
|
|
# Return the most up do date version of the given file. Useful for 'dimensions'.
|
|
def most_recent_file_of( target ):
|
|
|
|
def finder(st):
|
|
return re.search(target,st)
|
|
|
|
all = os.listdir(local_data_folder)
|
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
all.reverse()
|
|
all = list(funcy.filter( finder, all ))
|
|
|
|
#print("file list is: " + str(all))
|
|
if not all:
|
|
return ''
|
|
return all[0]
|
|
|
|
# Given a table schema, parse log file, return a list of dicts. Optionally remove some columns.
|
|
def parse_file_with( file, format, with_gid=0 ):
|
|
if not file: return []
|
|
all_users = []
|
|
for line in gzip.open(local_data_folder + file,'r'):
|
|
line = line.strip()
|
|
line_dict = dict(list(zip(format, line.decode('utf-8').split("\t"))))
|
|
if with_gid: line_dict['globalid'] = line_dict['globalid'].rstrip()
|
|
|
|
remove = []
|
|
for k,v in line_dict.items():
|
|
if v == '\\N' or v == b'\\N': remove.append(k)
|
|
for k in remove: line_dict.pop(k, None)
|
|
all_users.append(line_dict)
|
|
return all_users
|
|
|
|
|
|
# I keep my own cache.
|
|
# I return a list of the read lines if the log dates in the file are within dates (top of this file), or FALSE
|
|
def is_requestfile_interesting(fname):
|
|
global thefiles, thefiles_dat
|
|
#begin_month = ['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07']
|
|
#begin_month = ['2020-09','2020-10','2020-08']
|
|
begin_month = ['2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07']
|
|
|
|
first = {}
|
|
lines = False
|
|
if fname in thefiles_dat:
|
|
f_date = parse(thefiles_dat[fname])
|
|
#print("\t\t+ %s" % str(f_date))
|
|
first = {'year':str(f_date.year), 'month':"%i-%02i" % (f_date.year,f_date.month) }
|
|
#print("\t\t- %s" % str(first))
|
|
#print("\t\t* From: %s (%s)" % (first['month'], thefiles_dat[fname]) )
|
|
print("+ %s" % first['month'])
|
|
else:
|
|
filei = 0
|
|
#thefiles.write(fname + ',')
|
|
|
|
g_file = gzip.open(local_data_folder+fname,'r')
|
|
lines = g_file.readlines()
|
|
|
|
last = 0
|
|
i = 0
|
|
j = -1
|
|
while not last:
|
|
last = requests_line(lines[i].decode('utf-8'))
|
|
i += 1
|
|
first = 0
|
|
while not first:
|
|
first = requests_line(lines[j].decode('utf-8'))
|
|
j -= 1
|
|
|
|
print("- %s" % first['month'])
|
|
|
|
thefiles.write(fname + "," + str(first['date']) + ',' + str(last['date']) + '\n')
|
|
thefiles.flush()
|
|
|
|
# TODO more robust here
|
|
if first['month'] in begin_month:
|
|
print("++ Using it.")
|
|
if lines: return lines
|
|
return gzip.open(local_data_folder+fname,'r').readlines()
|
|
return False
|
|
|
|
|
|
# This is it:
|
|
# e670d58a-25cb-4666-9675-8438615a5a4a 2019-05-18 13:01:03.558 2019 2019-05 2019-05-18 -256911301467799527 94250000000003187 94250000000000001 94250000000000001 \N \N \N \N /api/v1/courses/3187/assignments?page=4573781&per_page=30 Java/1.8.0_191 GET 35.173.111.106 81639 assignments_api index Course 3187 \N 6dad4c59c75a3492b830fb3b1136e1bc -553092862543029181 200 HTTP/1.1 170000000000376
|
|
|
|
|
|
# TODO - investigate pearson, developer key: 170000000000376 and their ridiculous amounts of hits.
|
|
# and all these others: https://ilearn.gavilan.edu/accounts/1/developer_keys
|
|
|
|
#from dateutil.relativedelta import relativedelta
|
|
#diff = relativedelta(start, ends)
|
|
|
|
secs_in_a_24hr_block = 60 * 60 * 24 # 24 HOUR BLOCK
|
|
secs_in_a_4hr_block = 60 * 60 * 4 # 4 HOUR BLOCK
|
|
secs_in_a_block = 60 * 15 # 15 MINUTE BLOCK
|
|
start_of_time = '2020-08-23 00:00:00'
|
|
|
|
# Why is this 7 minutes off?
|
|
# start = dt.strptime(start_of_time, '%Y-%m-%d %H:%M:%S').replace(tzinfo=pytz.timezone('US/Pacific'))
|
|
|
|
pst = pytz.timezone('US/Pacific')
|
|
start = pst.localize(dt.strptime(start_of_time, '%Y-%m-%d %H:%M:%S'))
|
|
start_seconds = start.timestamp()
|
|
|
|
# epoch slot: imagine time starts on Jan 1 of 20xx, and is counted off in xxxxxxxxxxxxx4 hour slots, so
|
|
# xxxxxx time 0 = jan 1, 12am - 3:59am, time 1 = 4am - 8am,... and so on.
|
|
# xxxxxx So there's 6 of these per day.
|
|
#
|
|
# In this version I'm doing 15 minute slots, 4 per hour, 96 per day.
|
|
#
|
|
# Return a 'timeblock'. An integer number of 15 minute blocks from my epoch. Expects a datetime object in PST timezone.
|
|
def timeblock_from_dt(dt_obj):
|
|
global start, start_seconds
|
|
secs = dt_obj.timestamp() - start_seconds
|
|
return int( secs / secs_in_a_block )
|
|
|
|
# Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
|
|
def dt_from_timeblock(tb):
|
|
delta = timedelta(seconds=tb*secs_in_a_block)
|
|
return start + delta
|
|
|
|
####
|
|
# Twenty Four hour timeblocks
|
|
def timeblock_24hr_from_dt(dt_obj):
|
|
global start, start_seconds
|
|
secs = dt_obj.timestamp() - start_seconds
|
|
return int( secs / secs_in_a_24hr_block )
|
|
|
|
# Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
|
|
def dt_from_24hr_timeblock(tb):
|
|
delta = timedelta(seconds=tb*secs_in_a_24hr_block)
|
|
return start + delta
|
|
|
|
|
|
|
|
####
|
|
# Four hour timeblocks
|
|
def timeblock_4hr_from_dt(dt_obj):
|
|
global start, start_seconds
|
|
secs = dt_obj.timestamp() - start_seconds
|
|
return int( secs / secs_in_a_4hr_block )
|
|
|
|
# Returns a time in PST, given a 'timeblock'. Will be used in translating back to human time
|
|
def dt_from_4hr_timeblock(tb):
|
|
delta = timedelta(seconds=tb*secs_in_a_4hr_block)
|
|
return start + delta
|
|
|
|
|
|
# I make the line into a dict, erase keys with no data, make a DT field called date, make a time_block (int) field.
|
|
def requests_line(line,i=0):
|
|
L = line # strip?
|
|
if type(L) == type(b'abc'): L = line.decode('utf-8')
|
|
line_parts = L.split("\t")
|
|
for pattern in unwanted_req_paths:
|
|
if pattern in L:
|
|
return 0
|
|
d = dict(list(zip(requests_format, L.split("\t"))))
|
|
remove = []
|
|
for k,v in d.items():
|
|
if v == '\\N' or v == b'\\N': remove.append(k)
|
|
for k in remove: d.pop(k, None)
|
|
d['date'] = dt.strptime( d['timestamp'], "%Y-%m-%d %H:%M:%S.%f" )
|
|
d['date'] = d['date'].replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
|
|
d['time_block'] = timeblock_from_dt(d['date'])
|
|
#if i % 1000 == 1: print(d)
|
|
return d
|
|
|
|
import time
|
|
|
|
# Take all the requests.gz files and index them in some useful fashion.
|
|
# Bulk insert of requests logs. Too much data to be useful.
|
|
def requests_file(fname_list):
|
|
global mycourses
|
|
samples = codecs.open('cache/request_samples.txt', 'a', 'utf-8')
|
|
conn,cur = db()
|
|
|
|
RESUME = 610
|
|
|
|
folderi = 0
|
|
filei = 0
|
|
last_time = time.process_time()
|
|
|
|
q = "INSERT INTO requests_sum1 (userid, courseid, timeblock, viewcount) VALUES (?,?,?,?) ON CONFLICT (userid,courseid,timeblock) DO UPDATE SET viewcount=viewcount+1"
|
|
|
|
for fname in fname_list:
|
|
#if folderi > 2: return
|
|
print("\n%i\t%s \t" % (folderi, fname), end='', flush=True)
|
|
folderi += 1
|
|
if folderi < RESUME:
|
|
continue
|
|
filei = 0
|
|
|
|
try:
|
|
lines = is_requestfile_interesting(fname)
|
|
if lines:
|
|
vals_cache = []
|
|
for L in lines:
|
|
thisline = requests_line(L,filei) #TODO select if timeblock exists
|
|
if not thisline:
|
|
continue
|
|
if random.random() > 0.99999:
|
|
#L = str(L)
|
|
if type(L) == type(b'abc'): L = L.decode('utf-8')
|
|
parts = L.split('\t')
|
|
if len(parts)>17:
|
|
samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]]))
|
|
|
|
if not 'courseid' in thisline: continue
|
|
if not 'userid' in thisline: continue
|
|
|
|
v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 )
|
|
vals_cache.append( [ str(x) for x in v ] )
|
|
if filei % 5000 == 0:
|
|
conn.executemany(q, vals_cache)
|
|
conn.commit()
|
|
t = time.process_time()
|
|
delta = t - last_time
|
|
last_time = t
|
|
print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
|
|
samples.flush()
|
|
filei += 1
|
|
# do the commit on the entire file...
|
|
conn.executemany(q, vals_cache)
|
|
conn.commit()
|
|
t = time.process_time()
|
|
delta = t - last_time
|
|
last_time = t
|
|
print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
|
|
except Exception as e:
|
|
print(e)
|
|
print("Failed on: %s" % fname)
|
|
|
|
# Insert or update a request line.
|
|
def upsert_request(line, vals):
|
|
# "id userid courseid timeblock viewcount partcount"
|
|
|
|
# is it a view or a participation?
|
|
q = "INSERT INTO requests_sum1 (userid, courseid, timeblock, viewcount) VALUES ('%s','%s',%s,%s) ON CONFLICT (userid,courseid,timeblock) DO UPDATE SET viewcount=viewcount+1" % ( str(vals[0]), str(vals[1]), str(vals[2]), str(vals[3]) )
|
|
return q
|
|
|
|
|
|
|
|
# Generic insert of a dict into a table. Keys of dict must match table columns.
|
|
def dict_to_insert(thisline,table): # a dict
|
|
vals = []
|
|
v_str = ''
|
|
first = 1
|
|
q = "INSERT INTO %s (" % table
|
|
|
|
for k in thisline.keys():
|
|
#print(k)
|
|
if k == 'date': continue
|
|
if not first:
|
|
q += ","
|
|
v_str += ","
|
|
q += k
|
|
v_str += "?"
|
|
vals.append(str(thisline[k]))
|
|
first = 0
|
|
q += ") VALUES (" + v_str + ")"
|
|
return q, vals
|
|
|
|
# This and the following merge functions do direct inserts without further tallying.
|
|
# This now does tallying by timeblock.
|
|
def merge_requests():
|
|
req = [x for x in os.listdir(local_data_folder) if 'requests' in x]
|
|
print(f"Checking {len(req)} request log files.")
|
|
requests_file(req)
|
|
#i = 0
|
|
#max = 20000
|
|
|
|
#for f in os.listdir(local_data_folder):
|
|
# if re.search(r'requests',f) and i < max:
|
|
# req.append(f)
|
|
# i += 1
|
|
|
|
def merge_comm_channel():
|
|
setup_table('comm_channel')
|
|
(conn,cur) = db()
|
|
count = 0
|
|
|
|
cfile = most_recent_file_of('communication_channel_dim')
|
|
cm = parse_file_with( cfile, communication_channel_dim_format)
|
|
for U in cm:
|
|
q,v = dict_to_insert(U,'comm_channel')
|
|
try:
|
|
cur.execute(q,v)
|
|
count += 1
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
print("Processed %i comm channel entries" % count)
|
|
|
|
|
|
def merge_pseudonym():
|
|
setup_table('pseudonym')
|
|
(conn,cur) = db()
|
|
count = 0
|
|
|
|
cfile = most_recent_file_of('pseudonym_dim')
|
|
cm = parse_file_with( cfile, pseudonym_dim_format)
|
|
for U in cm:
|
|
q,v = dict_to_insert(U,'pseudonym')
|
|
try:
|
|
cur.execute(q,v)
|
|
count += 1
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
print("Processed %i pseudonym entries" % count)
|
|
|
|
|
|
|
|
|
|
def merge_users():
|
|
setup_table('users')
|
|
(conn,cur) = db()
|
|
|
|
user_file = most_recent_file_of('user_dim')
|
|
users = parse_file_with( user_file, users_format)
|
|
for U in users:
|
|
q,v = dict_to_insert(U,'users')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
def merge_courses():
|
|
setup_table('courses')
|
|
(conn,cur) = db()
|
|
|
|
c_file = most_recent_file_of('course_dim')
|
|
courses = parse_file_with( c_file, course_format)
|
|
for U in courses:
|
|
q,v = dict_to_insert(U,'courses')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
def merge_course_sections():
|
|
setup_table('course_sections')
|
|
(conn,cur) = db()
|
|
|
|
c_file = most_recent_file_of('course_section_dim')
|
|
c_sections = parse_file_with( c_file, course_section_dim_format)
|
|
count = 0
|
|
for U in c_sections:
|
|
q,v = dict_to_insert(U,'course_sections')
|
|
count += 1
|
|
#if count % 1000 == 0:
|
|
# print( "%i - " % count + q + " " + str(v) )
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
print("Processed %i course sections" % count)
|
|
|
|
def merge_enrollment():
|
|
setup_table('enrollment')
|
|
(conn,cur) = db()
|
|
|
|
c_file = most_recent_file_of('enrollment_dim')
|
|
print("Using enrollments from: %s" % c_file)
|
|
courses = parse_file_with( c_file, enrollment_dim_format)
|
|
count = 0
|
|
for U in courses:
|
|
q,v = dict_to_insert(U,'enrollment')
|
|
count += 1
|
|
#if count % 1000 == 0:
|
|
# print( "%i - " % count + q + " " + str(v) )
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
print("Processed %i enrollments" % count)
|
|
|
|
|
|
def merge_term():
|
|
setup_table('terms')
|
|
(conn,cur) = db()
|
|
|
|
c_file = most_recent_file_of('enrollment_term_dim')
|
|
courses = parse_file_with( c_file, term_format)
|
|
for U in courses:
|
|
q,v = dict_to_insert(U,'terms')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
def merge_roles():
|
|
(conn,cur) = db()
|
|
cur.execute(setup_table('roles'))
|
|
conn.commit()
|
|
|
|
c_file = most_recent_file_of('role_dim')
|
|
courses = parse_file_with( c_file, role_format)
|
|
for U in courses:
|
|
q,v = dict_to_insert(U,'roles')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
def merge_convos():
|
|
setup_table('conversation')
|
|
setup_table('conversation_message')
|
|
|
|
(conn,cur) = db()
|
|
c_file = most_recent_file_of('conversation_dim')
|
|
ccc = parse_file_with( c_file, conversation_dim_format)
|
|
for U in ccc:
|
|
q,v = dict_to_insert(U,'conversation')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
c_file = most_recent_file_of('conversation_message_dim')
|
|
ccc = parse_file_with( c_file, conversation_message_dim_format)
|
|
for U in ccc:
|
|
q,v = dict_to_insert(U,'conversation_message')
|
|
try:
|
|
cur.execute(q,v)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
# For returning sqlite results as dicts
|
|
def dict_factory(cursor, row):
|
|
d = {}
|
|
for idx, col in enumerate(cursor.description):
|
|
d[col[0]] = row[idx]
|
|
return d
|
|
|
|
|
|
# TODO... approaches to all this data... list requests in order descending time, unique users, and just
|
|
# file stats on them...?
|
|
|
|
# people's maxs, with time block window:
|
|
# select *,count(course_canvasid),sum(count),max(time_block),min(time_block) from summary_course_user_views group by username order by min(time_block)
|
|
|
|
# get the time back: dt_from_timeblock(11296)
|
|
|
|
|
|
# Attempt to do tallying
|
|
def make_views_summarys():
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
|
|
q1 = """select courses.id, courses.code, courses.name, courses.visible, courses.state, courses.sis from courses
|
|
join terms on courses.termid=terms.id
|
|
where terms.name="2021 Spring" and courses.state="available";
|
|
"""
|
|
|
|
cursor.execute(q1)
|
|
sp2020_courses = cursor.fetchall()
|
|
#print(json.dumps(sp2020_courses,indent=2))
|
|
|
|
print("Summarizing views... ", end='')
|
|
for C in sp2020_courses:
|
|
print("%s, " % C['name'], end='', flush=True)
|
|
|
|
#if input('enter to go, q to quit') == 'q': break
|
|
q2 = """select sum(requests_sum1.viewcount) as views, requests_sum1.timeblock as block, courses.code, courses.canvasid as ccid,
|
|
users.name, users.id, users.canvasid from requests_sum1
|
|
join users on users.id = requests_sum1.userid
|
|
join courses on courses.id=requests_sum1.courseid
|
|
where courses.id="%s"
|
|
group by users.name, block """ % C['id']
|
|
cursor.execute(q2)
|
|
views = cursor.fetchall()
|
|
#print(json.dumps(views,indent=2))
|
|
for U in views:
|
|
q3 = """INSERT INTO summary_course_user_views ("courseid","course_canvasid", "username","userid","user_canvasid","count","time_block") VALUES (?,?,?,?,?,?,?);"""
|
|
vals = [C['id'], U['ccid'], U['name'], U['id'], U['canvasid'], U['views'], U['block']]
|
|
#print( q3 )
|
|
#print( vals )
|
|
#print('')
|
|
cursor.execute(q3,vals)
|
|
connection.commit()
|
|
connection.close()
|
|
|
|
# original without time_blocks info.
|
|
def make_views_summarys_v1():
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
|
|
q1 = """select courses.id, courses.code, courses.name, courses.visible, courses.state, courses.sis from courses
|
|
join terms on courses.termid=terms.id
|
|
where terms.name="2020 Spring " and courses.state="available";
|
|
"""
|
|
|
|
cursor.execute(q1)
|
|
sp2020_courses = cursor.fetchall()
|
|
#print(json.dumps(sp2020_courses,indent=2))
|
|
|
|
for C in sp2020_courses:
|
|
print("Summarizing views for " + C['name'])
|
|
|
|
#if input('enter to go, q to quit') == 'q': break
|
|
q2 = """select count(requests.id) as views, courses.code, courses.canvasid as ccid, users.name, users.id, users.canvasid from requests
|
|
join users on users.id = requests.userid
|
|
join courses on courses.id=requests.courseid
|
|
where requests.courseid="%s"
|
|
group by users.name;""" % C['id']
|
|
cursor.execute(q2)
|
|
views = cursor.fetchall()
|
|
#print(json.dumps(views,indent=2))
|
|
for U in views:
|
|
q3 = """INSERT INTO summary_course_user_views ("courseid","course_canvasid", "username","userid","user_canvasid","count") VALUES (?,?,?,?,?,?);"""
|
|
vals = [C['id'], U['ccid'], U['name'], U['id'], U['canvasid'], U['views'] ]
|
|
print( q3 )
|
|
print( vals )
|
|
print('')
|
|
cursor.execute(q3,vals)
|
|
connection.commit()
|
|
connection.close()
|
|
|
|
|
|
# Setup my basic db stats base from scratch
|
|
def full_reload():
|
|
|
|
path = "cache/canvas_data/"
|
|
file = "data.db"
|
|
if exists(path + file):
|
|
time = date_time = dt.fromtimestamp( getmtime(path + file) )
|
|
newname = 'data'+ time.strftime('%Y%m%d') + ".db"
|
|
print("renaming old data file to %s" % newname)
|
|
try:
|
|
os.rename(path+file, path + newname)
|
|
except Exception as e:
|
|
print("Couldn't rename file:", str(e))
|
|
|
|
sync_non_interactive()
|
|
|
|
setup_table('requests_sum1')
|
|
setup_table('courses')
|
|
setup_table('users')
|
|
setup_table('roles')
|
|
setup_table('enrollment')
|
|
setup_table('terms')
|
|
setup_table('conversation')
|
|
setup_table('conversation_message')
|
|
setup_table('summary')
|
|
setup_table('index')
|
|
|
|
|
|
|
|
|
|
|
|
merge_users()
|
|
merge_comm_channel()
|
|
merge_convos()
|
|
merge_courses()
|
|
merge_pseudonym()
|
|
merge_enrollment()
|
|
merge_term()
|
|
merge_roles()
|
|
merge_course_sections()
|
|
|
|
#merge_requests()
|
|
|
|
#make_views_summarys()
|
|
sched_to_db()
|
|
|
|
def guess_dept(t):
|
|
#print(t)
|
|
method = 1 # crosslisted courses get their own dept
|
|
method = 2 # xlisted takes dept first listed
|
|
|
|
if method==1:
|
|
p = "^([A-Z/]+)\d+"
|
|
m = re.search(p, t['code'])
|
|
if m:
|
|
return m.group(1)
|
|
return '?'
|
|
if method==2:
|
|
p = "^([A-Z]+)[\d/]+"
|
|
m = re.search(p, t['code'])
|
|
if m:
|
|
return m.group(1)
|
|
return '?'
|
|
|
|
|
|
# Main view of all class / all user overview...
|
|
def dept_with_studentviews(dept="", sem=''):
|
|
if not sem:
|
|
sem = input("which semester? (ex: 2020 Fall) ")
|
|
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
|
|
q1 = """select courses.id, courses.canvasid, courses.code, courses.name, courses.visible, courses.state, courses.sis from courses
|
|
join terms on courses.termid=terms.id
|
|
where terms.name="%s" and courses.state="available" """ % sem
|
|
if dept:
|
|
q1 += " AND courses.code LIKE '%" + dept + "%';"
|
|
|
|
print(q1)
|
|
cursor.execute(q1)
|
|
courses = cursor.fetchall()
|
|
return courses
|
|
#print(json.dumps(sp2020_courses,indent=2))
|
|
|
|
# version 1 of this got as high as 208 MB. Removed names, other unused columns.
|
|
|
|
qry = "select suv.user_canvasid, suv.courseid, suv.count, suv.time_block, courses.code from summary_course_user_views as suv join courses on courses.id=suv.courseid where suv.courseid=%s"
|
|
|
|
if dept == 'all':
|
|
views_records = list( funcy.flatten( [ cursor.execute(qry% x['id']).fetchall() for x in sp2020_courses ] ) )
|
|
by_course = funcy.group_by( lambda x: x['code'], views_records)
|
|
for k,v in by_course.items():
|
|
by_course[k] = funcy.group_by( lambda x: x['user_canvasid'], v)
|
|
return by_course
|
|
|
|
|
|
def f(x):
|
|
return x['code']
|
|
this_dept = filter( lambda x: guess_dept(x)==dept, sp2020_courses )
|
|
|
|
|
|
views_records = list( funcy.flatten( [ cursor.execute(qry% x['id']).fetchall() for x in this_dept ] ) )
|
|
|
|
return funcy.group_by( lambda x: x['courseid'], views_records)
|
|
return "Couldn't find that department: %s" % dept
|
|
|
|
|
|
def get_courses_in_term_local(term="172"):
|
|
q = """SELECT c.code, c.name, c.state, c.canvasid, c.id FROM courses AS c JOIN terms AS t ON c.termid=t.id WHERE t.canvasid==%s""" % term
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
allrows = cursor.fetchall()
|
|
return allrows
|
|
|
|
# get student count
|
|
def course_student_stats(canvasid):
|
|
q = """SELECT u.name FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s
|
|
AND e.type="StudentEnrollment"
|
|
AND e.workflow="active" """ % (canvasid)
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
allrows = cursor.fetchall()
|
|
a = [ len(allrows), ]
|
|
b = []
|
|
for x in allrows: b.append(x[0])
|
|
return [a,b]
|
|
return [x[0] for x in allrows]
|
|
|
|
# get enrollments in a course, name and canvasid
|
|
def get_course_enrollments(courseid):
|
|
q = """SELECT u.name, u.canvasid FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s
|
|
AND e.type="StudentEnrollment"
|
|
AND e.workflow="active";""" % str(courseid)
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
allrows = cursor.fetchall()
|
|
return allrows
|
|
|
|
|
|
# get teacher name from local db
|
|
def course_quick_stats(canvasid):
|
|
q = """SELECT c.id AS courseid, c.code, tt.name, c.state, COUNT(u.id) AS student_count FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
JOIN (
|
|
SELECT c.id AS courseid, u.id AS userid, c.code, u.name FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid=%s
|
|
AND e."type"="TeacherEnrollment"
|
|
) AS tt ON c.id=tt.courseid
|
|
WHERE c.canvasid=%s
|
|
AND e."type"="StudentEnrollment"
|
|
GROUP BY c.code
|
|
ORDER BY c.code""" % (canvasid,canvasid)
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
allrows = cursor.fetchall()
|
|
return allrows
|
|
|
|
|
|
# What a student has taken / teacher has taught
|
|
def user_enrolled_in(userid):
|
|
q = """SELECT u.canvasid as user_id, c.canvasid AS course_id, u.name, u.sortablename, c.code, c.name AS course_name, c.sis, t.name, p.current_login_at, p.current_login_ip, p.sis_user_id FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON e.user_id=u.id
|
|
JOIN pseudonym AS p ON p.user_id=u.id
|
|
JOIN terms AS t ON c.termid=t.id
|
|
WHERE u.canvasid=%s ORDER BY t.name ASC""" % userid
|
|
#AND e.workflow="active"
|
|
#GROUP BY u.canvasid""" ## AND e."type"="StudentEnrollment"
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
return cursor.fetchall()
|
|
|
|
|
|
# All students in this semester ...
|
|
def users_this_semester_db(sem=''):
|
|
if not sem:
|
|
sem = input("which semester? (ex: 202150) ")
|
|
|
|
q = """SELECT u.canvasid, u.name, u.sortablename, COUNT(e.id) AS num FROM enrollment AS e
|
|
JOIN users AS u ON e.user_id=u.id
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
WHERE c.sis LIKE "%s-%%"
|
|
AND e.workflow="active"
|
|
GROUP BY u.canvasid""" % sem ## AND e."type"="StudentEnrollment"
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
all_u = set()
|
|
for u in cursor:
|
|
print(u)
|
|
all_u.add(str(u[0]))
|
|
print("%i users this semester." % len(all_u))
|
|
return all_u
|
|
|
|
|
|
# Everyone whose first semester is .....
|
|
def users_new_this_semester(sem=""):
|
|
if not len(sem):
|
|
sem = input("which semester? (ex: 202150) ")
|
|
users_to_enroll = set()
|
|
|
|
where1 = "c.sis LIKE '%s-%%'" % sem
|
|
where2 = "c.sis NOT LIKE '%s-%%'" % sem
|
|
|
|
|
|
|
|
q = """SELECT u.canvasid, u.name, u.sortablename, GROUP_CONCAT(c.code), COUNT(e.id) AS num FROM enrollment AS e
|
|
JOIN users AS u ON e.user_id=u.id
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
WHERE %s
|
|
AND e.workflow="active"
|
|
AND e."type"="StudentEnrollment"
|
|
AND u.canvasid NOT IN (
|
|
SELECT u.canvasid FROM enrollment AS e
|
|
JOIN users AS u ON e.user_id=u.id
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
WHERE %s
|
|
AND e.workflow="active"
|
|
AND e."type"="StudentEnrollment"
|
|
GROUP BY u.canvasid
|
|
)
|
|
GROUP BY u.canvasid
|
|
ORDER BY num DESC, u.sortablename""" % (where1,where2)
|
|
|
|
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
#s = cursor.fetchall()
|
|
#if s:
|
|
for u in cursor:
|
|
users_to_enroll.add(str(u[0]))
|
|
#print(s)
|
|
print("%i new users this semester." % len(users_to_enroll))
|
|
return users_to_enroll
|
|
|
|
|
|
# All student users in STEM - from local db
|
|
def user_in_stem():
|
|
enrolled = set()
|
|
q = """SELECT c.id, c.canvasid, c.name, c.code, c.start, c.visible, c.state,
|
|
u.id AS userid, u.canvasid AS user_c_id, u.sortablename FROM courses AS c
|
|
JOIN enrollment AS e ON c.id=e.course_id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.canvasid="11015" AND e."type"="StudentEnrollment"
|
|
AND e."workflow"='active'
|
|
ORDER BY c.code, u.sortablename """
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
results = cursor.fetchall()
|
|
for u in results:
|
|
enrolled.add( (u[9], u[8] ) )
|
|
return enrolled
|
|
|
|
|
|
|
|
# Get all the classes in one dept
|
|
def dept_classes(dept,sem=''):
|
|
if not sem:
|
|
sem = input("which semester? (ex: 202150) ")
|
|
|
|
|
|
q = """SELECT c.id, c.canvasid, c.name, c.code, c.start, c.visible, c.state,
|
|
u.id AS userid, u.canvasid AS user_c_id, u.sortablename FROM courses AS c
|
|
JOIN enrollment AS e ON c.id=e.course_id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
WHERE c.name LIKE """ + '"' + dept + """%" AND c.sis LIKE """ + '"' + sem + """%" AND e."type"="StudentEnrollment"
|
|
AND e."workflow"='active'
|
|
ORDER BY c.code, u.sortablename """
|
|
|
|
users = set()
|
|
(connection,cursor) = db()
|
|
|
|
cursor.execute(q)
|
|
results = cursor.fetchall()
|
|
for u in results:
|
|
users.add( (u[9], u[8]) )
|
|
return users
|
|
|
|
|
|
|
|
# TODO
|
|
#
|
|
# depts -> courses -> count students... 1 structure... display as 1 grid? treeview?
|
|
# afterwards: views by student / by week, row of grids per class...
|
|
|
|
def depts_with_classcounts(sem=''):
|
|
if not sem:
|
|
sem = input("which semester? (ex: 202150) ")
|
|
|
|
# This is messier cause i don't have depts in database
|
|
# should I add that? Or just use python. TODO
|
|
|
|
q = """select users.canvasid, courses.code, courses.id, users.name, roles.name as role,
|
|
enrollment.workflow as user_status, courses.state as course_state
|
|
from courses join terms on courses.termid = terms.id
|
|
join enrollment on enrollment.course_id=courses.id
|
|
join users on enrollment.user_id = users.id
|
|
join roles on roles.id=enrollment.role
|
|
where terms.sis='%s' and enrollment.workflow='active'
|
|
order by courses.code""" % sem
|
|
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
cursor.execute(q)
|
|
results = cursor.fetchall()
|
|
connection.close()
|
|
|
|
def f(x):
|
|
return x['code']
|
|
by_dept_ = funcy.group_by( guess_dept, results )
|
|
by_dept = {}
|
|
|
|
def name_with_count(name,li):
|
|
count = len(li)
|
|
return (name,count,li[0]['id'])
|
|
|
|
for d,li in by_dept_.items():
|
|
classes_in_dept = funcy.group_by( f, li )
|
|
#print(classes_in_dept)
|
|
by_dept[d] = [ name_with_count(c,v) for c,v in classes_in_dept.items() ]
|
|
|
|
|
|
|
|
return by_dept
|
|
|
|
def arrange_data_for_web(dept='', sem=''):
|
|
if not sem:
|
|
sem = input("which semester? (ex: 202150) ")
|
|
|
|
# I want:
|
|
# - structure of dicts, 1 file per class
|
|
# - class -> teacher [ teacher1_cid: {name:nnn, week1:hits,week2:hits...],
|
|
# student [stuent1_cid: {name:nnn, week1:hits,week2:hits...],
|
|
#
|
|
|
|
q = "select * from courses join terms on courses.termid = terms.id where terms.sis='%s' and courses.state='claimed'" % sem
|
|
|
|
# three... seconds:
|
|
|
|
q2 = """select courses.code, users.name, roles.name as role from courses join terms on courses.termid = terms.id
|
|
join enrollment on enrollment.course_id=courses.id
|
|
join users on enrollment.user_id = users.id
|
|
join roles on roles.id=enrollment.role
|
|
where terms.sis='%s' and courses.state='claimed'
|
|
order by code, role """ % sem
|
|
|
|
|
|
# courses with users - need it as hierarchy - by course, or by user... or 1 user... (with logs...?)
|
|
|
|
q3 = """select users.canvasid, courses.code, users.name, roles.name as role,
|
|
enrollment.workflow as user_status, courses.state as course_state
|
|
from courses
|
|
join terms on courses.termid = terms.id
|
|
join enrollment on enrollment.course_id=courses.id
|
|
join users on enrollment.user_id = users.id
|
|
join roles on roles.id=enrollment.role
|
|
where terms.sis='%s'
|
|
order by courses.code""" % sem
|
|
|
|
|
|
|
|
|
|
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
|
|
|
|
cursor.execute(q3)
|
|
|
|
# fetch all or one we'll go for all.
|
|
results = cursor.fetchall()
|
|
#print(results)
|
|
connection.close()
|
|
|
|
def f(x):
|
|
return x['code']
|
|
by_dept_ = funcy.group_by( guess_dept, results )
|
|
by_dept = {}
|
|
|
|
for d,li in by_dept_.items():
|
|
by_dept[d] = funcy.group_by( f, li )
|
|
#by_course = funcy.group_by( f, results )
|
|
#return by_course
|
|
#print(json.dumps(by_dept,indent=2))
|
|
if not dept:
|
|
return by_dept # list(by_dept.keys())
|
|
|
|
if dept in by_dept:
|
|
return by_dept[d]
|
|
return "Error"
|
|
|
|
#
|
|
#
|
|
#
|
|
#
|
|
#
|
|
#
|
|
#
|
|
# This csv loading code isn't really necessary cause i get it all from the canvas_data files.
|
|
# Except that the enrollments don't seem to be there so this works.
|
|
#
|
|
# Saved to mine in the future.....
|
|
|
|
# Get enrollments. (Best to freshly run pipelines/get_rosters) and put them into DB
|
|
def build_tables(headers,name):
|
|
first = 1
|
|
query = "CREATE TABLE IF NOT EXISTS %s (\n" % name
|
|
for L in headers:
|
|
if not first:
|
|
query += ",\n"
|
|
first = 0
|
|
query += "\t%s %s" % (L,"text")
|
|
return query + "\n);"
|
|
|
|
def load_tables(table,headers,row,verbose=0):
|
|
(conn,cur) = db()
|
|
vals = []
|
|
v_str = ''
|
|
i = 0
|
|
q = "INSERT INTO %s (" % table
|
|
for L in headers:
|
|
if i:
|
|
q += ","
|
|
v_str += ","
|
|
q += L
|
|
v_str += "?"
|
|
vals.append(str(row[i]))
|
|
i += 1
|
|
q += ") VALUES (" + v_str + ")"
|
|
try:
|
|
cur.execute(q,vals)
|
|
if verbose:
|
|
print(q)
|
|
print(vals)
|
|
except Exception as e:
|
|
print(e)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
def semester_enrollments(verbose=0):
|
|
def qstrip(txt): return txt.strip('"')
|
|
|
|
epath = "cache/rosters/enrollments-2020-08-02-19-49-36.csv"
|
|
#cpath = "cache/rosters/spring2020/courses.2020-02-25T15-57.csv"
|
|
#upath = "cache/rosters/spring2020/users.2020-02-25T15-57.csv"
|
|
|
|
enrollments = [ list( map( qstrip, L.strip().split(','))) for L in open(epath,'r').readlines() ]
|
|
#classes = [ list( map( qstrip, L.strip().split(','))) for L in open(cpath,'r').readlines() ]
|
|
#users = [ list( map( qstrip, L.strip().split(','))) for L in open(upath,'r').readlines() ]
|
|
|
|
e = build_tables(enrollments[0],"enrollments")
|
|
#c = build_tables(classes[0],"classes")
|
|
#u = build_tables(users[0],"users")
|
|
|
|
if verbose:
|
|
#for x in [e,c,u]: print(x)
|
|
print(enrollments[0])
|
|
print(enrollments[5])
|
|
#print(classes[0])
|
|
#print(classes[5])
|
|
#print(users[0])
|
|
#print(users[5])
|
|
|
|
(conn,cur) = db()
|
|
q = e
|
|
try:
|
|
cur.execute(q)
|
|
if verbose: print(q)
|
|
except Exception as ex:
|
|
print(ex)
|
|
print(q)
|
|
conn.commit()
|
|
|
|
headers = enrollments[0]
|
|
rows = enrollments[1:]
|
|
# Probably don't want to commit on every row?
|
|
for row in rows:
|
|
load_tables("enrollments",headers,row,verbose)
|
|
|
|
# Show this as a big grid? D3? CSV?
|
|
|
|
# Ultimately we need session calcs too. When we get 15 minute chunks, then just add them up....
|
|
|
|
# Overview of student hits in a course. Return a (pandas??) table student/timeblock/hits 6 * 7 * 7 items per student.
|
|
|
|
|
|
|
|
def comm_channel_file():
|
|
"""all = os.listdir(local_data_folder)
|
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
all.reverse()
|
|
#print "sorted file list:"
|
|
#print all
|
|
for F in all:
|
|
if re.search('communication_channel_dim',F):
|
|
user_file = F
|
|
break
|
|
print("most recent comm channel file is " + user_file)"""
|
|
|
|
|
|
user_file = most_recent_file_of('communication_channel_dim')
|
|
|
|
all_commchannels = []
|
|
for line in gzip.open(local_data_folder+user_file,'r'):
|
|
line_dict = dict(list(zip(communication_channel_dim_format, line.split("\t"))))
|
|
line_dict['globalid'] = line_dict['globalid'].rstrip()
|
|
all_commchannels.append(line_dict)
|
|
df = pd.DataFrame(all_commchannels)
|
|
return df
|
|
|
|
def pseudonym_file():
|
|
all = os.listdir(local_data_folder)
|
|
all.sort(key=lambda x: os.stat(os.path.join(local_data_folder,x)).st_mtime)
|
|
all.reverse()
|
|
#print "sorted file list:"
|
|
#print all
|
|
for F in all:
|
|
if re.search('pseudonym_dim',F):
|
|
p_file = F
|
|
break
|
|
print("most recent pseudonym file is " + p_file)
|
|
all_users = []
|
|
for line in gzip.open(local_data_folder + p_file,'r'):
|
|
line_dict = dict(list(zip(pseudonym_dim_format, line.split("\t"))))
|
|
line_dict['authentication_provider_id'] = line_dict['authentication_provider_id'].rstrip()
|
|
all_users.append(line_dict)
|
|
df = pd.DataFrame(all_users)
|
|
return df
|
|
|
|
|
|
def abcd():
|
|
setup_table('index')
|
|
|
|
|
|
def crns_to_teachers():
|
|
semester = '202070'
|
|
(connection,cursor) = db()
|
|
emails = set()
|
|
crns = codecs.open('cache/eval_teachers_2020fa.txt','r').readlines()
|
|
q = """SELECT c.id, c.canvasid AS course_cid, c.name, c.code, u.name, u.sortablename, u.canvasid AS user_cid, c.sis, h.address FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
JOIN comm_channel AS h ON u.id=h.user_id
|
|
WHERE h."type"="email"
|
|
AND c.sis = "%s-%s"
|
|
AND NOT c.state="deleted"
|
|
AND e."type"="TeacherEnrollment"
|
|
GROUP BY h.address;"""
|
|
for c in crns:
|
|
c = c.strip()
|
|
print(c)
|
|
cursor.execute(q % (semester,c))
|
|
r = cursor.fetchall()
|
|
for inst in r:
|
|
emails.add(inst[8])
|
|
print(inst)
|
|
open('cache/eval_emails.txt','w').write( ';'.join(emails))
|
|
return emails
|
|
|
|
|
|
|
|
def all_sem_courses_teachers():
|
|
SEM = "202430"
|
|
q = f"""SELECT c.id, c.canvasid AS course_cid, c.name, c.code, u.name, u.sortablename, u.canvasid AS user_cid, p.sis_user_id FROM courses AS c
|
|
JOIN enrollment AS e ON e.course_id=c.id
|
|
JOIN users AS u ON u.id=e.user_id
|
|
JOIN pseudonym AS p ON p.user_id=u.id
|
|
WHERE c.sis LIKE "{SEM}-%"
|
|
AND NOT c.state="deleted"
|
|
AND e."type"="TeacherEnrollment"
|
|
ORDER BY u.sortablename;"""
|
|
(connection,cursor) = db()
|
|
cursor.execute(q)
|
|
courses = cursor.fetchall()
|
|
#print(courses)
|
|
return courses
|
|
|
|
|
|
def to_sis_sem(s):
|
|
season = s[0:2]
|
|
year = "20" + s[2:5]
|
|
a = {'sp':'30','su':'50','fa':'70'}
|
|
season = a[season]
|
|
return year+season
|
|
|
|
def build_db_schedule():
|
|
# from the schedule json files
|
|
target = "\_sched\_expanded\.json"
|
|
def finder(st):
|
|
return re.search(target,st)
|
|
|
|
fields = 'sem,sem_sis,crn,dept,num,gp,dean,code,name,teacher,type,cap,act,loc,site,date,days,time,cred,ztc,partofday'.split(',')
|
|
fff = codecs.open('cache/schedule_db_version.sql', 'w', 'utf-8')
|
|
fff.write("CREATE TABLE IF NOT EXISTS schedule ( id text, sem text, sem_sis text, dept text, num text, gp text, dean text, code text, crn text, name text, teacher text,mode text, loc text, cap text, act text, site text, date text, cred text, ztc text, days text, time text, partofday text);\n")
|
|
all = os.listdir('cache/')
|
|
all = list(funcy.filter( finder, all ))
|
|
all.sort()
|
|
for F in all:
|
|
print("\n\n" + F)
|
|
sched = json.loads(codecs.open('cache/'+F,'r','utf-8').read())
|
|
for S in sched:
|
|
parts = S['code'].split(' ')
|
|
S['dept'] = parts[0]
|
|
S['num'] = parts[1]
|
|
S['gp'] = gp[parts[0]]
|
|
S['dean'] = dean[parts[0]]
|
|
S['sem'] = F[0:4]
|
|
S['sem_sis'] = to_sis_sem(F[0:4])
|
|
if not 'partofday' in S:
|
|
S['partofday'] = ''
|
|
str = "INSERT INTO schedule (sem,sem_sis,crn,dept,num,gp,dean,code,name,teacher,mode,cap,act,loc,site,date,days,time,cred,ztc,partofday) VALUES (%s);\n" % \
|
|
", ".join( [ "'" + re.sub(r"'", "", S[x]) + "'" for x in fields ] )
|
|
#print(str)
|
|
fff.write(str)
|
|
|
|
def process_enrollment_data():
|
|
|
|
sem_index = {'201830':0, '201850':1, '201870':2, '201930':3, '201950':4, '201970':5, '202030':6, '202050':7, '202070':8, '202130':9, '202150':10, '202170':11, '202230':12, '202250':13, '202270':14, '202330':15}
|
|
|
|
def sem_to_idx(s):
|
|
return sem_index[str(s)]
|
|
|
|
p = pd.read_csv('cache/20221207_all_enrollments_by_student.csv')
|
|
p = p.fillna('')
|
|
p['sem_idx'] = p['sem'].map(sem_to_idx)
|
|
print(p)
|
|
print(sorted(p['sem'].unique()) )
|
|
print(sorted(p['mode'].unique()) )
|
|
print(sorted(p['site'].unique()) )
|
|
print(sorted(p['partofday'].unique()) )
|
|
print(sorted(p['days'].unique()) )
|
|
print(sorted(p['dept'].unique()) )
|
|
print(sorted(p['num'].unique()) )
|
|
print(len(p['num'].unique()) )
|
|
|
|
print("I see this many student/semester rows: ", len(p))
|
|
|
|
#q = p.groupby(["canvasid","sem"])
|
|
|
|
q = p.groupby(["canvasid"])
|
|
print("I see this many students: ", len(q))
|
|
#print(q.size())
|
|
r = pd.DataFrame(q.size())
|
|
print("Summary of course counts")
|
|
print(r.iloc[:,0].value_counts())
|
|
|
|
out = codecs.open('cache/20221207_all_enrollments_by_student_with_sems.csv','w','utf-8')
|
|
out.write('"canvasid","sem","mode","site","partofday","days","dept","num","cred","sem_idx","local_sem_idx"\n')
|
|
|
|
# convert absolute semester to sequence,
|
|
# ie: student's 1st, 2nd, 3rd, etc
|
|
for name,group in q:
|
|
# drop students with only a single semester -- no predictive value here
|
|
if len(group['sem_idx'].unique())<2:
|
|
continue
|
|
mn = group['sem_idx'].min()
|
|
group.loc[:,'local_sem_idx'] = group['sem_idx'].map(lambda x: x - mn)
|
|
out.write(group.to_csv(index=False, header=False))
|
|
|
|
s = p.groupby(by="sem")
|
|
#print("I see this many semesters: ", len(s))
|
|
#print(s.size())
|
|
|
|
|
|
|
|
# todo
|
|
|
|
def do_encoding():
|
|
# one shot encoding of each field
|
|
|
|
modes = {'hybrid':[0,0,0,1], 'in-person':[1,0,0,0], 'online':[0,1,0,0], 'online live':[0,0,1,0]}
|
|
sites = {'Coyote Valley':[1,0,0,0,0,0], 'Gilroy':[0,1,0,0,0,0], 'Hollister':[0,0,1,0,0,0], 'Morgan Hill':[0,0,0,1,0,0], 'Online':[0,0,0,0,1,0], 'Other':[0,0,0,0,0,0], 'San Martin Airport':[0,0,0,0,0,1], 'TBA':[0,0,0,0,0,0]}
|
|
times = {'':[0,0,0,0,0], 'Afternoon':[0,0,1,0,0], 'Evening':[0,0,0,1,0], 'Midday':[0,1,0,0,0], 'Morning':[1,0,0,0,0]}
|
|
days = {'':[0,0,0,0,0,0], 'F':[0,0,0,0,1,0], 'FS':[0,0,0,0,1,1], 'M':[1,0,0,0,0,0], 'MF':[1,0,0,0,1,0], 'MR':[1,0,0,1,0,0], 'MT':[1,1,0,0,0,0], 'MTR':[1,1,0,1,0,0], 'MTRF':[1,1,0,1,1,0], 'MTW':[1,1,1,0,0,0], 'MTWF':[1,1,0,1,1,0], 'MTWR':[1,1,1,1,0,0], 'MTWRF':[1,1,1,1,1,0], 'MW':[1,0,1,0,0,0], 'MWF':[1,0,1,0,1,0], 'MWR':[1,0,1,1,0,0], 'R':[0,0,0,1,0,0], 'RF':[0,0,0,1,1,0], 'S':[0,0,0,0,0,1], 'T':[0,1,0,0,0,0], 'TBA':[0,0,0,0,0,0], 'TF':[0,1,0,0,1,0], 'TR':[0,1,0,1,0,0], 'TRF':[0,1,0,1,1,0], 'TW':[0,1,1,0,0,0], 'TWR':[0,1,1,1,0,0], 'TWRF':[0,1,1,1,1,0], 'U':[0,0,0,0,0,0], 'W':[0,0,1,0,0,0], 'WF':[0,0,1,0,1,0], 'WR':[0,0,1,1,0,0]}
|
|
|
|
deptslist = ['ACCT', 'AE', 'AH', 'AJ', 'AMT', 'ANTH', 'APE', 'ART', 'ASTR', 'ATH', 'BIO', 'BOT', 'BUS', 'CD', 'CHEM', 'CMGT', 'CMUN', 'COS', 'CSIS', 'CWE', 'DM', 'ECOL', 'ECON', 'ENGL', 'ENGR', 'ENVS', 'ESL', 'ETHN', 'FRNH', 'GEOG', 'GEOL', 'GUID', 'HE', 'HIST', 'HTM', 'HUM', 'HVAC', 'JFT', 'JLE', 'JOUR', 'JPN', 'KIN', 'LIB', 'LIFE', 'MATH', 'MCTV', 'MGMT', 'MUS', 'PHIL', 'PHYS', 'POLS', 'PSCI', 'PSYC', 'RE', 'SJS', 'SOC', 'SPAN', 'THEA', 'WELD', 'WTRM']
|
|
d_len = len(deptslist)
|
|
d_template = [ 0 for i in range(d_len) ]
|
|
depts = {}
|
|
for i in range(d_len):
|
|
depts[ deptslist[i] ] = d_template.copy()
|
|
depts[ deptslist[i] ][i] = 1
|
|
|
|
numslist = ['1', '10', '100', '100A', '101', '102', '103', '104', '105', '107', '107A', '109', '10A', '10B', '11', '110', '111', '112', '113', '114', '118', '119', '11A', '11B', '11C', '12', '120', '121', '122', '124', '126', '128', '129', '12A', '12B', '12L', '13', '130', '131', '132', '133', '135', '13A', '13B', '13C', '13D', '14', '140', '142', '143', '144', '14A', '14B', '15', '150', '152', '154', '156', '157', '158', '15A', '15B', '16', '160', '162', '164', '166', '16A', '16B', '16C', '17', '171', '173', '175', '176', '178', '179', '17A', '17B', '17C', '18', '180', '181', '182', '183', '184', '186', '187', '189', '18A', '18B', '19', '190', '191A', '192', '19A', '19B', '19C', '1A', '1B', '1C', '1L', '2', '20', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '20A', '20B', '20C', '21', '210', '211', '212', '213', '213A', '214', '215', '216', '217', '218', '219', '22', '220', '221', '223', '225', '226', '227', '228', '229', '229A', '23', '230', '231', '231A', '232', '233', '235', '236', '24', '240', '242', '24A', '24B', '24C', '24D', '25', '250', '25A', '25B', '26', '260', '27', '270', '28', '280', '281', '282', '283', '290', '291A', '2A', '2B', '2C', '2F', '2J', '2L', '3', '301', '30A', '30B', '32', '33A', '33B', '33C', '34', '34A', '34B', '35', '36', '37', '38', '3A', '3B', '3C', '3D', '4', '40', '400', '402', '41', '411', '412', '412A', '412B', '413', '414', '415', '416', '42', '420', '43', '430', '44', '440', '44A', '44B', '44C', '45', '46', '47', '48', '4A', '4B', '4C', '5', '51', '52', '527', '528', '53', '530', '531', '534', '535', '536', '537', '538', '539', '54', '541', '542', '543', '547', '548', '549', '54L', '55', '550', '552', '553', '554', '557', '558A', '56', '560', '562', '563', '564', '565', '569', '570A', '570B', '571A', '571B', '571C', '575', '5A', '5B', '6', '60', '600', '601', '602', '603', '61A', '61B', '61C', '62A', '62B', '62C', '636', '638', '64A', '64B', '64C', '64D', '65', '66A', '66B', '66C', '68A', '68B', '68C', '7', '700', '701', '702A', '702B', '703', '704A', '705', '706', '707', '709', '70A', '71', '710', '71A', '71B', '727', '728', '73', '731', '732', '737', '738', '74', '740', '741', '742', '743', '744', '746', '747', '748', '749', '74A', '74B', '75', '752', '753', '754', '756', '76', '762', '763', '764', '77', '775', '776', '78', '784', '785', '786', '787', '788', '789', '79', '793', '7A', '7B', '7C', '8', '80', '81A', '81C', '83', '83A', '84', '85', '88A', '88B', '8A', '8B', '8C', '9', '90', '91A', '91B', '92', '97', '9A', '9B']
|
|
n_len = len(numslist)
|
|
n_template = [ 0 for i in range(n_len) ]
|
|
nums = {}
|
|
for i in range(n_len):
|
|
nums[ numslist[i] ] = n_template.copy()
|
|
nums[ numslist[i] ][i] = 1
|
|
|
|
return [modes,sites,times,days,depts,nums]
|
|
|
|
for x in [modes,sites,times,days,depts,nums]:
|
|
print('var')
|
|
for k,v in x.items():
|
|
print("\t",k,":",v)
|
|
|
|
def printer(x):
|
|
print(x)
|
|
|
|
|
|
|
|
def all_students_history(handler=printer, limit=1000):
|
|
qry = """SELECT
|
|
u.name AS user_name,
|
|
u.canvasid,
|
|
e.workflow AS workflow,
|
|
e.created,
|
|
e.updated,
|
|
c.name AS course_name,
|
|
t.name AS term_name
|
|
FROM
|
|
users u
|
|
JOIN
|
|
enrollment e ON u.id = e.user_id
|
|
JOIN
|
|
courses c ON e.course_id = c.id
|
|
JOIN
|
|
terms t ON c.termid = t.id
|
|
WHERE
|
|
e.type = 'StudentEnrollment'
|
|
ORDER BY
|
|
u.sortablename, e.created ASC;"""
|
|
|
|
connection = sqlite3.connect(sqlite_file)
|
|
connection.row_factory = dict_factory
|
|
cursor = connection.cursor()
|
|
cursor.execute(qry)
|
|
# Fetch the first row
|
|
line = cursor.fetchone()
|
|
i = 1
|
|
|
|
# Process rows one by one
|
|
while line is not None:
|
|
# Process the current row
|
|
handler(line)
|
|
|
|
i += 1
|
|
if i > limit: break
|
|
|
|
# Fetch the next row
|
|
line = cursor.fetchone()
|
|
|
|
# Close the cursor and connection
|
|
cursor.close()
|
|
connection.close()
|
|
|
|
|
|
|
|
def add_sessions():
|
|
j_in = json.loads( codecs.open('cache/2023sessions.json','r','utf-8').read())
|
|
|
|
# Function to format the starttime
|
|
def format_starttime(day, time):
|
|
day_parts = day.split()
|
|
start_time = time.split('-')[0].strip()
|
|
return f"2023-08-{day_parts[1][:2]} {start_time}:00"
|
|
|
|
def esc(input_string):
|
|
input_string = input_string.replace(' ', ' ')
|
|
return input_string.replace("'", "''")
|
|
|
|
def ugh(s):
|
|
return f"`{s}`"
|
|
|
|
# Sample values for track and location
|
|
track = 1
|
|
id = 1341
|
|
|
|
f = "id, title, desc, type, length, starttime, track, location, location_irl, mode, gets_survey, is_flex_approved, parent"
|
|
fields = ",".join([ ugh(x) for x in f.split(', ') ])
|
|
|
|
for session_data in j_in:
|
|
#print(json.dumps(session_data,indent=2))
|
|
|
|
location = ""
|
|
link = ""
|
|
if "link" in session_data: link = session_data['link']
|
|
|
|
mode = ""
|
|
if "mode" in session_data: mode = session_data['mode']
|
|
if mode == "Zoom": mode = "online"
|
|
if mode == "Hybrid": mode = "hybrid"
|
|
if mode == "Face-to-Face": mode = "inperson"
|
|
|
|
# Generate INSERT statement
|
|
insert_statement = f"""INSERT INTO conf_sessions
|
|
({fields})
|
|
VALUES
|
|
({id}, '{esc(session_data["title"])}', '{esc(session_data["description"])}', 101, 1, '{format_starttime(session_data["day"], session_data["time"])}', {track}, '{location}', '{link}', '{mode}', 1, 1, 1320);
|
|
"""
|
|
|
|
print(insert_statement)
|
|
id += 1
|
|
|
|
|
|
|
|
|
|
def test_long_running():
|
|
from time import sleep
|
|
print("Starting long process...")
|
|
for i in range(20):
|
|
print("sleeping %s" % i, flush=True)
|
|
sleep(1)
|
|
|
|
|
|
def courses_to_sched():
|
|
# Correlate rows in courses table with an id to rows in schedule table.
|
|
conn,cur = db('cache/canvas_data/data20231012.db')
|
|
q = "SELECT canvasid, code, sis, schedule FROM courses ORDER BY sis DESC"
|
|
conn.row_factory = dict_factory
|
|
|
|
seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
|
|
|
|
cur.execute(q)
|
|
courses = cur.fetchall()
|
|
sem = ''
|
|
for c in courses:
|
|
try:
|
|
if re.search(r'^\d\d\d\d\d\d\-\d\d\d\d\d$', c[2]):
|
|
semparts = c[2].split('-')
|
|
yr = semparts[0][2:4]
|
|
if yr in ['16','17']: continue
|
|
print(c, end=' ')
|
|
season = seasons[ str(semparts[0][4:6]) ]
|
|
sem = f"{season}{yr}"
|
|
crn = semparts[1]
|
|
print(sem, end=' ')
|
|
q2 = f"SELECT * FROM schedule WHERE crn='{crn}' AND sem='{sem}'"
|
|
cur.execute(q2)
|
|
sched = cur.fetchall()
|
|
if sched:
|
|
sched = sched[0]
|
|
id = sched[0]
|
|
q3 = f"UPDATE courses SET schedule='{id}' WHERE canvasid='{c[0]}'"
|
|
cur.execute(q3)
|
|
|
|
print(sched)
|
|
#print(q3)
|
|
else:
|
|
print()
|
|
except Exception as e:
|
|
print(e)
|
|
conn.commit()
|
|
|
|
def query_multiple(q, database=sqlite_file):
|
|
conn,cur = db(database) # 'cache/canvas_data/data20231012.db'
|
|
conn.row_factory = dict_factory
|
|
cur = conn.cursor()
|
|
cur.execute(q)
|
|
return cur.fetchall()
|
|
|
|
def query_execute(q, database=sqlite_file):
|
|
conn,cur = db(database)
|
|
cur.execute(q)
|
|
conn.commit()
|
|
|
|
def sched_to_db():
|
|
d = 'DROP TABLE IF EXISTS `schedule`;'
|
|
table = '''CREATE TABLE `schedule` (
|
|
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
|
`crn` varchar(10) NOT NULL,
|
|
`code` varchar(30) NOT NULL,
|
|
`units` varchar(20) NOT NULL,
|
|
`teacher` tinytext NOT NULL,
|
|
`start` varchar(30) NOT NULL,
|
|
`end` varchar(30) NOT NULL,
|
|
`type` varchar(20) NOT NULL,
|
|
`loc` varchar(80) NOT NULL,
|
|
`site` varchar(50) NOT NULL,
|
|
`partofday` varchar(40) NOT NULL,
|
|
`cap` INTEGER,
|
|
`act` INTEGER,
|
|
`sem` varchar(10) NOT NULL
|
|
) ;
|
|
'''
|
|
|
|
#conn,cur = db('cache/canvas_data/data20231012.db')
|
|
conn,cur = db()
|
|
print(table)
|
|
cur.execute(d)
|
|
cur.execute(table)
|
|
conn.commit()
|
|
|
|
vals_cache = []
|
|
last_time = time.process_time()
|
|
i = 0
|
|
|
|
output = codecs.open('cache/schedule.sql','w','utf-8')
|
|
for year in ['16','17','18','19','20','21','22','23','24']:
|
|
for sem in ['sp','su','fa']:
|
|
term = f"{sem}{year}"
|
|
print(term)
|
|
try:
|
|
sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
|
|
show_summary = 1
|
|
|
|
query = "INSERT INTO schedule (crn, code, units, teacher, start, end, type, loc, site, partofday, cap, act, sem) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?);"
|
|
|
|
for c in sched:
|
|
pod = ''
|
|
if 'partofday' in c: pod = c['partofday']
|
|
q = [c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, c['cap'], c['act'], term]
|
|
vals_cache.append( q ) # [ str(x) for x in q ] )
|
|
#print(f"{i}: {q}")
|
|
i += 1
|
|
if i % 500 == 0:
|
|
conn.executemany(query, vals_cache)
|
|
conn.commit()
|
|
vals_cache = []
|
|
t = time.process_time()
|
|
delta = t - last_time
|
|
last_time = t
|
|
print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True)
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
conn.executemany(query, vals_cache)
|
|
conn.commit()
|
|
|
|
def students_current_semester(sem='202370'):
|
|
q = f"""SELECT u.canvasid FROM enrollment AS e
|
|
JOIN users AS u ON e.user_id=u.id
|
|
JOIN courses AS c ON e.course_id=c.id
|
|
WHERE c.sis LIKE "{sem}-%"
|
|
AND e.workflow="active"
|
|
AND e."type"="StudentEnrollment"
|
|
GROUP BY u.canvasid;"""
|
|
result = query_multiple(q)
|
|
#for r in result:
|
|
# print(json.dumps(result,indent=2))
|
|
return result
|
|
|
|
def users_with_history():
|
|
q = '''SELECT u.name, u.sortablename, u.canvasid, c.code, s.partofday, s.type, s.site, s.units, t.sis, s.sem FROM users u
|
|
JOIN enrollment e ON u.id = e.user_id
|
|
JOIN courses c ON c.id = e.course_id
|
|
JOIN terms t ON c.termid = t.id
|
|
JOIN schedule s ON (s.crn=SUBSTR(c.sis,INSTR(c.sis, '-')+1,5) AND s.semsis=t.sis)
|
|
WHERE e.type='StudentEnrollment' AND e.workflow='active'
|
|
ORDER BY u.sortablename, t.sis, c.code ;'''
|
|
result = query_multiple(q)
|
|
#for r in result:
|
|
# print(json.dumps(result,indent=2))
|
|
return result
|
|
|
|
def sem_schedule(sem):
|
|
q = f"""SELECT * FROM schedule WHERE sem='{sem}' ORDER BY code,crn;"""
|
|
return query_multiple(q)
|
|
|
|
|
|
def course_mode(crn,sem):
|
|
q = f"""SELECT type FROM schedule WHERE crn='{crn}' AND sem='{sem}';"""
|
|
#print(q)
|
|
result = query_multiple(q)
|
|
if result:
|
|
return result[0]['type']
|
|
return ''
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print ('')
|
|
options = {
|
|
1: ['Read and join communications channels.', merge_comm_channel],
|
|
2: ['Read and join users files.', merge_users ],
|
|
3: ['Read and join courses files.', merge_courses ],
|
|
4: ['Read and join enrollment files.', merge_enrollment ],
|
|
5: ['Read and join terms files.', merge_term ],
|
|
6: ['Read and join roles files.', merge_roles ],
|
|
7: ['Read and join conversation files', merge_convos],
|
|
8: ['Read all courses', semester_enrollments],
|
|
9: ['Load requests files. Merge into 15min blocks.', merge_requests ],
|
|
10: ['Full reload. Rename current db.', full_reload],
|
|
11: ['test setup index', abcd],
|
|
12: ['Test web version of data files (json)', make_views_summarys], #depts_with_classcounts], # arrange_data_for_web],
|
|
13: ['Test web version of data files (?)', depts_with_classcounts],
|
|
14: ['Student views, classes in 1 dept', dept_with_studentviews],
|
|
15: ['AEC report positive attendance', aec_su20_report],
|
|
16: ['Create list of all employees', all_gav_employees],
|
|
17: ['List emails of evaluated instructors this semester', crns_to_teachers],
|
|
18: ['Fetch this semester shells with teachers', all_sem_courses_teachers],
|
|
19: ['Build DB schedule from json files', build_db_schedule],
|
|
20: ['Process enrollment data', process_enrollment_data],
|
|
21: ['Encode data', do_encoding],
|
|
22: ['all students course history', all_students_history],
|
|
23: ['test long running', test_long_running],
|
|
24: ['add conference sessions', add_sessions],
|
|
25: ['gavilan.cc extended schedule to sql insert format', sched_to_db],
|
|
26: ['correlate courses to schedule id', courses_to_sched],
|
|
27: ['report all users', users_with_history],
|
|
#19: ['add evals for a whole semester', instructor_list_to_activate_evals],
|
|
#16: ['Upload new employees to flex app', employees_refresh_flex],
|
|
}
|
|
|
|
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
|
resp = int(sys.argv[1])
|
|
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
|
|
|
else:
|
|
print ('')
|
|
for key in options:
|
|
print(str(key) + '.\t' + options[key][0])
|
|
|
|
print('')
|
|
resp = input('Choose: ')
|
|
|
|
# Call the function in the options dict
|
|
options[ int(resp)][1]()
|
|
|
|
|