from importlib import reload import json, codecs, requests, re, pdb, csv, textdistance, collections import sys, csv, string, funcy, math, shutil, os import pytz, time import pandas as pd import matplotlib.pyplot as plt #from pandas import TimeGrouper from PIL import Image from collections import defaultdict from pipelines import fetch, fetch_stream, header, url, FetchError, put_file from schedules import get_semester_schedule from courses import course_enrollment, users_in_semester from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock from localcache import teachers_courses_semester, course_mode, sem_schedule from localcache2 import all_2x_sem_courses_teachers, all_sem_courses_teachers from schedules import campus_dept_hierarchy #from pipelines import dean, dean_names #TODO from util import dept_from_name, most_common_item from os.path import exists, getmtime from localcache2 import refresh_semester_schedule_db from canvas_secrets import url from dateutil import parser from datetime import datetime as dt from datetime import timedelta import datetime import queue from threading import Thread from os import path # for NLP #import spacy from gensim import corpora, models, similarities, downloader, utils from nltk import stem # todo: these constants #last_4_semesters = 'fall2020 summer2020 spring2020 fall2019'.split(' ') #last_4_semesters_ids = [62, 60, 61, 25] last_4_semesters = 'spring2021 fall2020 summer2020 spring2020'.split(' ') last_4_semesters_ids = [168, 65, 64, 62] log_default_startdate = "2021-08-23T00:00:00-07:00" lds_stamp = parser.parse(log_default_startdate) recvd_date = '2023-01-01T00:00:00Z' num_threads = 25 max_log_count = 500000 ########## ########## ########## GETTING USER DATA ########## ########## # All users to a cache file cache/allusers.json def fetchAllUsers(): if exists('cache/allusers.json'): time = date_time = dt.fromtimestamp( getmtime('cache/allusers.json') ) newname = 'cache/allusers_'+ time.strftime('%Y%m%d') + ".json" print("renaming old data file to %s" % newname) os.rename('cache/allusers.json', newname) out1 = codecs.open('cache/allusers.json','w','utf-8') out2 = codecs.open('cache/allusers_ids.json','w','utf-8') all_u = fetch_stream(url + '/api/v1/accounts/1/users?per_page=100', 1) ids = [] main_list = [] for this_fetch in all_u: for U in this_fetch: ids.append(U['id']) main_list.append(U) ids.sort() out2.write( json.dumps(ids, indent=2)) out1.write( json.dumps(main_list, indent=2)) out2.close() out1.close() return ids ########## ########## ########## TEACHERS LIST AND LOCAL USERS FILE ########## ########## # Fetch teacher users objects from local cache def teacherRolesCache(): # I used to be load_users users_raw = json.load(open('cache/ilearn_staff.json','r')) users = {} users_by_id = {} for U in users_raw: users[ U['login_id'] ] = U users_by_id[ U['id'] ] = U return users, users_by_id # Outputs: cache/ilearn_staff.json # Canvas: Fetch all people with gavilan.edu email address def teacherRolesUpdateCache(): # I used to be get_users t = fetch('/api/v1/accounts/1/users?per_page=500&search_term=%40gavilan.edu&include[]=email') g = open('cache/ilearn_staff.json','w') g.write( json.dumps(t) ) g.close() #put_file('/gavilan.edu/staff/flex/2020/','cache/','ilearn_staff.json') print("Wrote to 'cache/ilearn_staff.json'") return teacherRolesCache() # Fetch preferred email address for a given user id. ( Canvas ) def getEmail(user_id): results = fetch("/api/v1/users/" + str(user_id) + "/communication_channels") for r in results: if r['type']=='email': return r['address'] return '' ########## ########## ########## TEACHERS AND OTHER STAFF ########## ########## # # Gather all my info, CRM style, in the folder teacherdata # # # Typical actions: For everyone with a teacher role: # - What are the courses they taught for the last X semesters? # - What's their activity level each semester? # - Which of those courses are Online, Hybrid or Face2face? # + column for each semester: OHLOHL # - How many online classes have they taught in the past? # - Are they brand new, or brand new online?# further... # - what's their department? # - what's their badges and 'tech level?' # - # All teachers in a particular course def getAllTeachers(course_id=59): # a list qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher' t = url + qry while(t): t = fetch(t) # def classType(t): if t == 'lecture': return 'L' if t == 'online': return 'O' if t == 'hours': return 'R' if t == 'lab': return 'A' if t == 'hybrid': return 'H' else: return 'L' # todo: fix bug in schedule parser so non-online classes have a type field def my_blank_string(): return "no data" def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'} def my_empty_dict(): return defaultdict(my_blank_string) def get_email_from_rec(name,name_to_record): #print "Looking up: " + name try: return name_to_record[name]['email'] except Exception as e: print("Missing Teacher %s" % name) return 'noemail@gavilan.edu' # Pull the staff directory on the webpage. Convert to pandas dataframe def staff_dir(get_fresh=False): """ if get_fresh: url = "http://www.gavilan.edu/staff/dir.php" regex = "var\slist=(\[.*\]);" response = requests.get(url).text m = re.search(regex,response) if m: output = '{"staff":' + m.group(1) + '}' of = open('cache/teacherdata/staff_dir.json','w') of.write(output) js = json.loads(output) df = pd.DataFrame(js['staff']) return df print("Wrote cache/teacherdata/staff_dir.json") else: print("Failed on staff directory scrape") return '' else: input = json.loads(open('cache/teacherdata/staff_dir.json','r').read()) df = pd.DataFrame(input['staff']) return df """ # TODO lol get fresh again... old_dir = csv.reader(open('cache/personnel2020_04_12.csv'), delimiter=',') dept1_crxn = {r[0]:r[1] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } dept2_crxn = {r[0]:r[2] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } title_crxn = {r[0]:r[3] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') } revised_dir = [ ] columns = next(old_dir) for r in old_dir: old_dept = r[2] if old_dept in dept1_crxn: new_one = dept1_crxn[old_dept] if dept2_crxn[old_dept]: new_one += '/' + dept2_crxn[old_dept] if title_crxn[old_dept]: new_one += '/' + title_crxn[old_dept] r[2] = new_one revised_dir.append(r) print(revised_dir) return pd.DataFrame(revised_dir,columns=columns) # # # # ### # ### TEACHER CRM FUNCTIONS # ### # def schedForTeacherOverview(long,short): sem = get_semester_schedule(short) sem['type'] = sem['type'].apply(classType) #sem['code'] = sem[['code','type']].apply(' '.join,axis=1) sem['sem'] = short sem = sem.drop(['time','loc','name','date','days'],axis=1) # ,'crn' return sem # Return a dataframe of the last 4 semester schedules put together def oneYearSchedule(): sp19 = schedForTeacherOverview('2019spring','sp19') su19 = schedForTeacherOverview('2019summer','su19') fa19 = schedForTeacherOverview('2019fall','fa19') sp20 = schedForTeacherOverview('2020spring','sp20') # The four-semester schedule a = pd.concat([sp19,su19,fa19,sp20], sort=True, ignore_index=True) a = a.drop(['cap','cmp','extra','rem','sec','cred','act'], axis=1) a.to_csv('cache/one_year_schedule.csv') return a def num_sections_last_year(line): #if not type(line)=='str': return 0 parts = line.split(' ') return len(parts) def sec_type_stats(line): #print(type(line)) #if not type(line)=='str': return {'fail':1} #print("in sts: " + str(line)) parts = line.split(' ') output = defaultdict(int) for p in parts: output[p] += 1 return output def prct_online(line): d = sec_type_stats(line) #print(d) total = 0 my_total = 0 for k,v in d.items(): total += v if k == 'O': my_total += v return int(100 * ((1.0)*my_total / total)) def prct_lecture(line): #print(line) d = sec_type_stats(line) #if 'fail' in d: return 0 total = 0 my_total = 0 for k,v in d.items(): total += v if k == 'L': my_total += v return int(100 * ((1.0)*my_total / total)) def prct_hybrid(line): d = sec_type_stats(line) #if 'fail' in d: return 0 total = 0 my_total = 0 for k,v in d.items(): total += v if k == 'H': my_total += v return int(100 * ((1.0)*my_total / total)) # Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files def teacher_basic_info(sched, from_ilearn, names): bi = from_ilearn # pd.DataFrame(from_ilearn) bi.rename(columns={'id':'canvasid','login_id':'goo'}, inplace=True) # bi.drop(['name',],axis=1,inplace=True) #print(bi) #input('xx') sp20 = schedForTeacherOverview('2020spring','sp20') codes_sp20 = sp20.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) ) crns_sp20 = sp20.groupby('teacher')['crn'].apply( lambda x: ' '.join( map( str, funcy.distinct(x))) ) codes_sp20.rename(columns={'code':'sp20code'}, inplace=True) codes_sp20.to_csv('cache/trash/codes_sp20.csv',header=True) crns_sp20.rename(columns={'crn':'sp20crn'}, inplace=True) crns_sp20.to_csv('cache/trash/crns_sp20.csv',header=True) a = sched.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) ) a = pd.DataFrame(a) a.reset_index(inplace=True) a['dept'] = a.apply(guessDept,axis=1) print(a) def find_that_name(x): #print(x) if 'teacher' in x: return names(x['teacher']) #print('name not found?') return '' a['ilearn_name'] = a.apply( find_that_name, axis=1) a.rename(columns={'code':'courses'}, inplace=True) #print(type(a)) a.reset_index(inplace=True) a = pd.merge(a,codes_sp20.rename('sp20courses'), on='teacher') a = pd.merge(a,crns_sp20.rename('sp20crns'), on='teacher') a.to_csv('cache/trash/sched_w_sp20.csv',header=True) print(a) a['canvasid'] = a['teacher'].map(names) #print(a) c = pd.merge(bi, a, left_on='name', right_on='ilearn_name', how='outer') c.to_csv('cache/trash/basic.csv',header=True) #print(c) return c # TODO Old and broken # what percentage of their sections were online / hybrid /lecture ? # Consumes: output/semesters/fa19_sched.json and etc for 1 year # Outputs: cache/teacher_by_semester.csv, def teacherModalityHistory(sched=[],names=[]): if not len(sched): sched = oneYearSchedule() #names = match_username() # How many classes a teacher taught lect/online/hybrid/hours sec_type = sched.groupby(['teacher','sem'])['type'].apply(' '.join) sec_type.to_csv('cache/teacherdata/teacher_by_semester.csv',header=True) ## THIS IS THE LIST of how many ## lecture, hybrid, online they've taught #sec_type = pd.read_csv('cache/teacherdata/teacher_by_semester.csv') sec_grp = sec_type.groupby('teacher').aggregate( ' '.join ) #sec_grp.to_csv('cache/trash/sec_grp_3.csv',header=True) #sec_grp = sec_grp.iloc[1:] ## I'm seeing bad items on the first 2 #sec_grp.drop(index='teacher') #sec_grp.to_csv('cache/trash/sec_grp_0.csv',header=True) # sec_grp = pd.DataFrame(sec_grp) #print(type(sec_grp)) sec_grp['prct_online'] = sec_grp['type'].map(prct_online) sec_grp['prct_lecture'] = sec_grp['type'].map(prct_lecture) sec_grp['prct_hybrid'] = sec_grp['type'].map(prct_hybrid) sec_grp['num_sections_last_year'] = sec_grp['type'].map(num_sections_last_year) sec_grp.drop('type',axis=1,inplace=True) sec_grp.reset_index(inplace=True) sec_grp.to_csv('cache/teacherdata/modality_history.csv') return sec_grp def teacherCourseHistory(a,names): pass # actually not using this. moved to _basic_info # YEEEAH sched = a.groupby(['teacher','code']) #for name,group in sched: # print(name) #print(sched.count()) return a['name'] = a.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1) a['email'] = a.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1) a.sort_values(by=['dept','teacher','codenum'],inplace=True) a = a.drop(['teacher'],axis=1) a.to_csv('cache/teacherdata/courses_taught.csv') return a """ d = a.groupby(['teacher']) # ,'dept','codenum','codeletter' out1 = open('teacherdata/courses_taught.csv','w') by_dept = {} # x todo: sort by dept also for name, group in d: #print name if re.search(r'^\d+',name) or name=='TBA': print("Skipping weird name: ", name) continue rec = {'email':'xx'} try: rec = records_by_sname[name] #print rec except Exception as e: print("Missing Teacher %s" % name) continue out1.write(name+"\t"+rec['email']) s = set() #print group for idx,r in group.iterrows(): s.add( str(r[1]) + str(r[2]) + str(r[3])) for clas in sorted(s): d = dept_from_name(clas) if d in by_dept: if name in by_dept[d]: by_dept[d][name].append(clas) else: by_dept[d][name] = [ clas, ] else: by_dept[d] = { name: [ clas, ] } out1.write("\n\t"+str(clas)) out1.write("\n") out1.write( json.dumps(by_dept,indent=2))""" # Consumes: output/semesters/fa19_sched.json and etc for 1 year # Outputs: cache/course_teacher_combos.csv, def teacherSharedCourses(a=[]): if not len(a): a = oneYearSchedule() # List of classes. Group by teacher/format. Shows who has historically # taught a class and who teaches it most often. c = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter' c = c.groupby(['dept','codenum','codeletter']) #,'teacher' c = c.aggregate(lambda x: set(x)) c.to_csv('teacherdata/course_teacher_combos.csv') ## THIS is the list of teachers who ## share courses return c # TODO: this is broken # Consumes: output/semesters/fa19_sched.json and etc for 1 year # Outputs: cache/num_courses_per_dept.csv (not teacher_course_oer_deptcount) # How many courses in each department were taught in the last year? def departmentCountCourses(a=[]): if not len(a): a = oneYearSchedule() tt = a.drop(['code','partofday','sem','site','type'],axis=1) #,'dept','codeletter' """records_by_sname = defaultdict(my_empty_dict, match_usernames()) tt.drop_duplicates(keep='first',inplace=True) tt['name'] = tt.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1) tt['email'] = tt.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1) tt = tt.drop(['teacher'],axis=1) tt.sort_values(by=['dept','name','codenum'],inplace=True) count = tt['dept'].value_counts() count.to_csv('cache/num_courses_per_dept.csv', header=True)""" def clean_nonprint(s): return re.sub(f'[^{re.escape(string.printable)}]', '', s) def read_cmte(names): output = [] out2 = defaultdict(list) input = codecs.open('cache/teacherdata/committees_2018_2019.csv','r','utf-8') with input as csvfile: cmtereader = csv.reader(csvfile, delimiter=',', quotechar='"') for row in cmtereader: for R in row: R = R.strip() R = clean_nonprint(R) (fname,lname,cmtes) = row a = re.split(",\s*",cmtes) if len(a)>1: cmtes = a else: cmtes = a name1 = lname + ", " + fname name2 = fname + " " + lname name = name1 realname = names(name1) if not realname: realname = names(name2) name = name2 if realname: for cmm in cmtes: output.append( [realname, cmm] ) out2[realname].append(cmm) else: print("committee participant name failed: %s / %s:\t%s" % (name1,name2,str(a))) print(type(name1)) #print(out2) return output,out2 def read_training_records(): myinput = open('cache/teacherdata/more_2018_2019_training_attendance.txt','r').readlines() current_sesh = "" ppl_in_sesh = {} all_ppl = set() for L in myinput: L = L.strip() if L: if L.startswith('#'): ma = re.search(r'^\#\s(.*)$',L) if ma: current_sesh = ma.group(1) else: print("-- read_training_records: Couldn't find training set? " + L) else: if current_sesh in ppl_in_sesh: ppl_in_sesh[current_sesh].append(L) else: ppl_in_sesh[current_sesh] = [ L, ] all_ppl.add(L) if 0: print(ppl_in_sesh) print(all_ppl) # Want to pivot the dict, so key is a name, value is another dict, where k2 is session name, v2 is Y/N d_of_d = defaultdict(dict) for k,v in ppl_in_sesh.items(): for user in v: d_of_d[user][k] = 'Y' return d_of_d # open a file and mark the people with their ids given. Return a dataframe def read_bootcamp1(filename): a = pd.read_csv(filename) #print(a) b = a.loc[:, ['canvas_id','grade','last_activity']] b.rename(columns={'canvas_id':'bc1canvasid','grade':'bootcamp_grade','last_activity':'bootcamp_date'}, inplace=True) #print(b) return b # open a file and mark the people with their ids given. Return a dataframe def read_bootcamp2(filename): a = pd.read_csv(filename) #print(a) b = a.loc[:, ['canvas_id','grade','last_activity']] b.rename(columns={'canvas_id':'bc2canvasid','grade':'bootcamp_progress','last_activity':'bootcamp_date'}, inplace=True) #print(b) return b def not_blank_or_pound(L): if L.startswith("#"): return False L = L.strip() if L == "": return False return True def temp1(x): #print(x[1]) return x[1] def add_realnames(df,names): # the surveys. raw name is in 2nd column df['ilearn_name'] = df.apply( lambda x: names(temp1(x),1), axis=1) return df def compareToughNames(a,b): # search for a in b m = re.search(a, b) if m: return True return False def compareNames(a,b,verbose=0): if a == b: return True cnDBG = 0 try: parts_a = [ W.lower() for W in re.split("[\s,]", a) ] [ x.strip() for x in parts_a ] parts_b = [ W.lower() for W in re.split("[\s,]", b) ] [ x.strip() for x in parts_b ] pa2 = sorted([ parts_a[0], parts_a[-1] ]) pb2 = sorted([ parts_b[0], parts_b[-1] ]) if pa2 == pb2: if cnDBG: print("->Match: %s, %s" % (a,b)) return True if pa2[0] == pb2[0] or pa2[-1] == pb2[-1]: if cnDBG: print("--->Near match: %s" % b) return False except Exception as e: #print("Problem with compareNames %s , %s" % (a,b)) #print(e) return False if len(pa2[0])>3 and len(pb2[0])>3: if pa2[0][0] == pb2[0][0]: if pa2[0][1] == pb2[0][1]: if pa2[0][2] == pb2[0][2]: if cnDBG: print("===> Near match (first 3): %s, %s, %s, %s" % (a, b, pa2[0], pb2[0])) pass b = b.lower() a = a.lower() #if verbose: print("searching: %s / %s" % (a,b)) if re.search( b, a): #print("REGEX MATCH: %s | %s" % (a,b)) return True if re.search( a, b): #print("REGEX MATCH: %s | %s" % (a,b)) return True return False def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0): # manual records are ('name':'canvas_id') #print(ilearn_records) if not othername: return "" if type(othername) == type(1.25): return "" #if math.isnan(othername): return False if othername in manual_records: a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[othername]) )) if a: return a['name'] for x in ilearn_records: #print('f_i_r') #print(othername) #print(x) if compareNames(othername,x['name'],verbose): return x['name'] for k,v in manual_records.items(): #print(k) #print(othername) #print(type(othername)) b = re.search( k, othername) if b: a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[k]) )) if a: return a['name'] return "" def manualNamesAndDept(): # copied from // getTeachersInfoMain .... schedule_one_yr = oneYearSchedule() from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \ json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) ) manual_names = manualNames() names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names) teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn, names_lookup) # till here # the staff directory dr = staff_dir(False) print(dr) print(dr.columns) print( dr['department'].unique() ) # now to reconcile and combine these.... # # we want: # - alternate names of academic / other depts, with one preferred # - some people are PT Fac, FT Fac, Director, assistant, spec, and some titles are unknown. # - sometimes the hierarchy is of departments, and sometimes of people. try not to confuse that. # # eventually, want to get pics or other info from other sources too, o365, cranium cafe, etc # def manualNames(): mm = dict([ x.strip().split(',') for x in \ open('cache/teacherdata/teacher_manual_name_lookup.csv','r').readlines()]) mz = {} for k,v in mm.items(): mz[k] = v mz[k.lower()] = v parts = k.split(" ") if len(parts)==2: mz[ parts[1] + ", " + parts[0] ] = v mz[ parts[1] + "," + parts[0] ] = v #print(mz) return mz # given a list of class codes, return the most common (academic) department def guessDept(d_list): li = str(d_list.code).split(" ") count = defaultdict(int) #print(str(d_list.code)) for i in li: m = re.search(r'^([A-Z]+)$',i) if m: count[m.group(1)] += 1 mmax = 0 max_L = '' for k,v in count.items(): #print(" %s:%i, " % (k,v), end='') if v > mmax: mmax = v max_L = k print("") return max_L """ # Faculty Info Plans bootcamp_active.csv Started bootcamp. Remind them to finish it? bootcamp_passed.csv Badge'd for BC. Online and Hybrid teachers not on this list need reminding. courses_taught.csv x course_teacher_combos.csv Teachers who share the teaching of a course. Courses in common. emails_deans+chairs.txt Just a email list FA2017 Faculty Survey.csv Look at answers for video, helpful formats, and comments faculty_main_info.csv Has percentage mix of a teachers' online/hybrid/lecture history historical_shells_used.json x SP2019 Faculty Survey.csv Look at rate tech skills, topics interested in, would add video, and comments committees 2018 2019.csv Committees people serve on. Not so useful: teacher_by_semester.csv precursor to faculty_main_info. Has semesters separated. """ # # # # Call all the teacher info / CRM gathering stuff # Make one big csv file of everything I know about a teacher def getTeachersInfoMain(): schedule_one_yr = oneYearSchedule() #print(schedule_one_yr) #if input('q to quit ')=='q': return # comes from teacherRolesUpdateCache ... search for @gavilan.edu in email address from_ilearn = list( map( lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \ json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) ) #names_from_ilearn = list( [x.lower() for x in map( str, sorted(list(funcy.pluck('name',from_ilearn)))) ] ) from_ilearn_df = pd.DataFrame(from_ilearn) manual_names = manualNames() names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names) #print(from_ilearn_df) #if input('q to quit ')=='q': return #print(schedule_one_yr) #print("This is one year schedule.") #input('\npress enter to continue') teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn_df, names_lookup) #print(teacher_info) #input('\nThis is teacher info.\npress enter to continue') modality_history = teacherModalityHistory(schedule_one_yr,names_lookup) print(modality_history) #print("This is teacher modality history.") #input('\npress enter to continue') master = pd.merge( modality_history, teacher_info, on='teacher', how='outer') print(master) master.to_csv('cache/trash/joined1.csv') print(master.columns) #input('\nThis is Joined 1.\npress enter to continue') wp = read_bootcamp1('cache/teacherdata/bootcamp_passed.csv') #print(wp) master2 = pd.merge( master, wp, left_on='canvasid_x', right_on='bc1canvasid', how='outer') master2.to_csv('cache/trash/joined2.csv') print(master2) print(master2.columns) #input('\nThis is Joined 2.\npress enter to continue') wp = read_bootcamp2('cache/teacherdata/bootcamp_active.csv') master3 = pd.merge( master2, wp, left_on='canvasid_x', right_on='bc2canvasid', how='outer') master3.to_csv('cache/trash/joined3.csv') print(master3) print(master3.columns) #input('\nThis is Joined 3.\npress enter to continue') # THE VIEWS / HISTORY. UPDATE with get_recent_views() .... check it for appropriate dates.... views = json.loads( codecs.open('cache/teacherdata/activitysummary.json','r','utf-8').read() ) vdf = pd.DataFrame.from_dict(views,orient='index',columns=['cid','cname','views','goo','dates','dateviews']) print(vdf) #input('k') #master3.set_index('canvasid_x') master3 = pd.merge(master3, vdf, left_on='canvasid_x', right_on='cid',how='outer') dir_records = pd.DataFrame(staff_dir()) dir_records['email'] = dir_records['email'].str.lower() master3['email'] = master3['email'].str.lower() print(dir_records) master3 = pd.merge(master3, dir_records, on='email',how='outer') print(master3) #if input('q to quit ')=='q': return #master3.fillna(0, inplace=True) #master3['views'] = master3['views'].astype(int) #master3['num_sections_last_year'] = master3['num_sections_last_year'].astype(int) #cmte = pd.read_csv('cache/teacherdata/committees_2018_2019.csv') cmte,cmte_by_name = read_cmte(names_lookup) cmte_str_by_name = {} for k in cmte_by_name.keys(): #print(k) #print(cmte_by_name[k]) cmte_str_by_name[k] = ",".join(cmte_by_name[k]) cc = pd.DataFrame.from_dict(cmte_str_by_name,orient='index',columns=['committees']) # 'teacher', cc.reset_index(inplace=True) master4 = pd.merge(master3, cc, left_on='name', right_on='index', how='outer') master4.to_csv('cache/trash/joined4.csv') master4.drop(['teacher','ilearn_name','canvasid_y','bc1canvasid','bc2canvasid','cid','cname','index_y'],axis=1,inplace=True) # Exclude surveys for now """ survey_2017 = pd.read_csv('cache/teacherdata/FA2017 Faculty Survey.csv') survey_2017 = add_realnames(survey_2017,names_lookup) survey_2017.to_csv('cache/trash/survey1.csv') master5 = pd.merge(master4, survey_2017, left_on='name', right_on='ilearn_name', how='left') master5.to_csv('cache/trash/joined5.csv') survey_2019 = pd.read_csv('cache/teacherdata/SP2019 Faculty Survey.csv') survey_2019 = add_realnames(survey_2019,names_lookup) master6 = pd.merge(master5, survey_2019, left_on='name', right_on='ilearn_name', how='left') master6.to_csv('cache/trash/joined6.csv') newnames = [ x.strip() for x in open('cache/poll_question_names.txt','r').readlines() ] namedict = {} for i,n in enumerate(newnames): if i%3==1: newname = n if i%3==2: namedict[oldname] = newname if i%3==0: oldname = n master6 = master6.rename(columns=namedict) master6.to_csv('cache/teacherdata/staff_main_table.csv') master6.to_csv('cache/teacherdata/staff_main_table.csv') """ master4.to_csv('cache/teacherdata/staff_main_table.csv') master4.to_csv('gui/public/staff_main_table.csv') other_training_records = read_training_records() #print(json.dumps(other_training_records,indent=2)) #print("This is misc workshops.") tt = pd.DataFrame.from_dict(other_training_records,orient='index') tt = tt.fillna("") #print(tt) #input('\npress enter to continue') #teacherSharedCourses(schedule_one_yr) #getAllTeachersInTerm() # TODO - broken def enroll_staff_shell(): pass """staff = users_with_gavilan_email() for i,s in staff.iterrows(): print(s['canvasid'],s['name']) u = url + '/api/v1/courses/8528/enrollments' param = { 'enrollment[user_id]':s['canvasid'], 'enrollment[type]': 'StudentEnrollment', 'enrollment[enrollment_state]': 'active', } res = requests.post(u, headers = header, data=param) print(res.text) """ #"Jun 28 2018 at 7:40AM" -> "%b %d %Y at %I:%M%p" #"September 18, 2017, 22:19:55" -> "%B %d, %Y, %H:%M:%S" #"Sun,05/12/99,12:30PM" -> "%a,%d/%m/%y,%I:%M%p" #"Mon, 21 March, 2015" -> "%a, %d %B, %Y" #"2018-03-12T10:12:45Z" -> "%Y-%m-%dT%H:%M:%SZ" # take a list of raw hits. def activity_summary(hits): #infile = "cache/teacherdata/activity/G00101483.json" #data = json.loads(open(infile,'r').read()) #hits = data['raw'] if not hits: return [ [], [], ] dt_list = [] one_week = datetime.timedelta(days=14) # actually two.... today = dt.now().replace(tzinfo=pytz.timezone('UTC')) target = today - one_week for h in hits: the_stamp = parser.parse(h['created_at']) if the_stamp > target: dt_list.append(the_stamp) df = pd.DataFrame(dt_list, columns=['date',]) df.set_index('date', drop=False, inplace=True) df.rename(columns={'date':'hits'}, inplace=True) #df.resample('1D').count().plot(kind='bar') #return df.resample('1D').count().to_json(date_format='iso') #print(hits) #print(df) if not df.size: return [ [], [], ] bins = df.resample('1D').count().reset_index() bins['date'] = bins['date'].apply(str) #print(bins) return [bins['date'].to_list(), bins['hits'].to_list()] #plt.show() #df = df.groupby([df['date'].dt.to_period('D')]).count().unstack() #df.groupby(TimeGrouper(freq='10Min')).count().plot(kind='bar') #df.plot(kind='bar') # next step # 1. save timestamp of the fetch # # 2. parse it and only fetch since then. afterwards, pull out non-hits. Summarize day/week/month stats. # # 2a. merge old and new records, and re-summarize. # # 3. Next improvements in GUI. hook up to python server backend. # # Get views counts on current teachers. todo: month is hardcoded here def get_recent_views(id=1): dt_format = "%Y-%m-%dT%H:%M:%SZ" default_start_time = dt.strptime("2020-08-14T00:00:00Z", dt_format) default_start_time = default_start_time.replace(tzinfo=pytz.timezone('UTC')) end_time = dt.now(pytz.utc) print("End time is: %s" % str(end_time)) myheaders = "x,teacher,prct_online,prct_lecture,prct_hybrid,num_sections_last_year,canvasid_x,name,sortable_name,goo,email,index_x,courses,dept,ilearn_name_x,canvasid_y,canvasid_x,bootcamp_grade,bootcamp_date_x,canvasid_y,bootcamp_progress,bootcamp_date_y,index_y,committees".split(",") teachers = [row for row in csv.reader(open('cache/teacherdata/staff_main_table.csv','r'))][1:] #tt = teachers[6:10] summary = {} for t in teachers: name = t[1] if name=="" or name=="TBA": continue if not t[6]: continue the_id = int(float(t[6])) if the_id == 290: continue # STAFF STAFF goo = t[9] print(goo) # read log of this person: try: prev_logf = codecs.open('cache/teacherdata/activity/%s.json' % goo,'r','utf-8') prev_log = json.loads(prev_logf.read()) prev_logf.close() except: print("Exception happened on reading previous temp logs.") prev_log = '' if type(prev_log) == dict: lastfetch = dt.strptime(prev_log['meta']['lastfetch'], dt_format) lastfetch = lastfetch.replace(tzinfo=pytz.timezone('UTC')) print("last fetch is: " + str(lastfetch)) print("Hits BEFORE was: %i" % len(prev_log['raw'])) else: lastfetch = default_start_time prev_log = { "raw":[], } end_time = dt.now(pytz.utc) u = url + "/api/v1/users/%s/page_views?start_time=%s&end_time=%s&per_page=100" % (str(the_id),lastfetch.strftime(dt_format), end_time.strftime(dt_format)) #print(u) #input('getting this url') print(name + "\t",end='\n') if 1: # get fresh data? r = fetch(u) prev_log['raw'].extend( r ) summ = activity_summary(prev_log['raw']) mydata = {'meta':{'lastfetch':end_time.strftime(dt_format)},'summary':summ,'raw':prev_log['raw']} codecs.open('cache/teacherdata/activity/%s.json' % goo,'w','utf-8').write( json.dumps(mydata,indent=2)) summary[the_id] = [the_id, name, len(prev_log['raw']),goo, summ ,mydata['meta']] print("Hits AFTER is: %i" % len(prev_log['raw'])) codecs.open('cache/teacherdata/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) ) codecs.open('gui/public/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) ) # TODO broken? # Have they taught online or hybrid classes? """ def categorize_user(u): global role_table, term_courses their_courses = get_enrlmts_for_user(u, role_table) num_s = 0 num_t = 0 type = 's' online_only = 1 is_online = [] #print their_courses for x in their_courses.iterrows(): if len(x): ttype = x[1]['type'] if ttype=='StudentEnrollment': num_s += 1 if ttype=='TeacherEnrollment': num_t += 1 cid = x[1]['course_id'] current_term = term_courses[lambda x: x['id']==cid] if not current_term.empty: is_online.append(current_term['is_online'].values[0]) else: online_only = 0 else: online_only = 0 if num_t > num_s: type='t' if len(is_online)==0: online_only = 0 for i in is_online: if i==0: online_only = 0 #print "Type: " + type + " All online: " + str(online_only) + " Number courses this term: " + str(len(is_online)) return (u[0],type, online_only, len(is_online)) """ ########## ########## ########## PHOTOS ########## ########## # todo: threaded # Doest the account have a photo loaded? def checkForAvatar(id=2): try: t = url + '/api/v1/users/%s?include[]=last_login' % str(id) r2 = requests.get(t, headers = header) result = json.loads(r2.text) codecs.open('cache/users/%s.txt' % str(id),'w','utf-8').write( json.dumps(result,indent=2) ) if 'avatar_url' in result: if re.search(r'avatar\-50',result['avatar_url']): return 0 else: return (result['login_id'], result['avatar_url'], result['name']) except Exception as e: print("Looking for an avatar / profile pic had a problem: %s" % str(e)) return 0 # Grab em. Change the first if when continuing after problems.... def downloadPhoto(): import imghdr pix_dir = 'cache/picsCanvas2022/' # Update the list of all ilearn users? i_last_ix = '-1' photo_log_f = '' if 0: ## CHANGE TO 0 IF CRASHED / RESUMING.... ii = fetchAllUsers() photo_log_f = open("cache/fotolog.txt", "w") else: ii = json.loads(codecs.open('cache/allusers_ids.json','r').read()) photo_log_f = open("cache/fotolog.txt", "r+") i_last_ix = -1 try: ab = photo_log_f.read() print(ab) ac = ab.split("\n") print(ac) i_last_ix = ac[-2] print(i_last_ix) except: i_last_ix = -1 i_last_ix = int(i_last_ix) print("Last user index checked was: %s, which is id: %s" % \ (i_last_ix, ii[i_last_ix] )) print("Max index is: %i" % len(ii)) i_last_ix += 1 for index in range(i_last_ix, len(ii)): i = ii[index] photo_log_f.write("\n%i" % i ) a = checkForAvatar(i) if a: print(str(i) + ":\t" + str(a[0]) + "\t" + str(a[2]) ) try: r = requests.get(a[1], stream=True) if r.status_code == 200: r.raw.decode_content = True h=r.raw with open(pix_dir + a[0].lower(), 'wb') as f: shutil.copyfileobj(h, f) # rename to right file extension # TODO: Change imghdr to PILLOW (PIL) #img = Image.open(filename) #img_type = img.format # 'JPEG' img_type = imghdr.what(pix_dir + a[0].lower()) if img_type == 'jpeg': img_type = 'jpg' try: shutil.move(pix_dir + a[0].lower(),pix_dir + a[0].lower()+'.'+img_type) except Exception as e: print(" \tCouldn't rewrite file") else: print(str(i) + ":\t didn't get expected photo") except Exception as e: print(" \tProblem with download " + str(e)) else: print(str(i) + ":\tno user or no photo") pass def mergePhotoFolders(): staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ] headers = staff[0] staff = staff[1:] activestaff = [] for i,h in enumerate(headers): #print("%i. %s" % (i,h) ) pass for S in staff: if S[7] and S[15]: # if teacher (name present) and sp20crns (taught in sp20) activestaff.append(S[9].lower()) activestaffset=set(activestaff) #return a = 'cache/picsCanvas' b = 'gui/public/picsCanvas2018' c = 'gui/public/picsCanvasAll' # I want a big list of who has an avatar pic. # and i want to know how many updated since last DL, and how many are in only one or the other. old = os.listdir(b) count = defaultdict(int) oldset = set() newset = set() for O in old: if O.endswith('.jpg') or O.endswith('.png'): g = O.split(r'.')[0] oldset.add(g) for N in os.listdir(a): if N.endswith('.jpg') or N.endswith('.png'): g = N.split(r'.')[0] newset.add(g) """print("Active SP20 Teachers") print(activestaffset) print("Old Avatars") print(oldset) print("New Avatars") print(newset)""" updated_set = oldset.union(newset) tch_set = updated_set.intersection(activestaffset) only_old = oldset.difference(newset) only_new = newset.difference(oldset) print("Tch: %i Old: %i New: %i" % (len(activestaffset),len(oldset),len(newset))) print("All avatars: %i Teachers: %i Only in old: %i Only in new: %i" % ( len(updated_set), len(tch_set), len(only_old), len(only_new))) allpics = os.listdir(c) haveapic = {} for A in allpics: if A.endswith('.jpg') or A.endswith('.png'): g = (A.split(r'.')[0]).upper() haveapic[g] = A outie = codecs.open('gui/public/pics.json','w').write( json.dumps( haveapic,indent=2)) def mergePhotoFolders2(): staff = [ row for row in csv.reader( open('cache/teacherdata/staff_main_table.csv','r') ) ] headers = staff[0] staff = staff[1:] activestaff = [] for i,h in enumerate(headers): #print("%i. %s" % (i,h) ) pass for S in staff: if S[5]: activestaff.append(S[9].lower()) a = 'cache/picsCanvas' b = 'gui/public/picsCanvas2018' c = 'gui/public/picsCanvasAll' old = os.listdir(b) count = defaultdict(int) for N in os.listdir(a): if N.endswith('.jpg') or N.endswith('.png'): g = N.split(r'.')[0] if g in activestaff: count['s'] += 1 if N in old: #print( "Y - %s" % N) count['y'] += 1 else: #print( "N - %s" %N ) count['n'] += 1 else: #print("x - %s" % N) count['x'] += 1 print("Of the 2020 avatars, %i are in the 2018 folder, and %i are new." % (count['y'],count['n'])) print("Of %i active teachers, %i have avatars." % (len(activestaff),count['s'])) #print(json.dumps(count,indent=2)) # Go through my local profile pics, upload any that are missing. def uploadPhoto(): files = os.listdir('pics2017') #print json.dumps(files) pics_i_have = {} #goo = "g00188606" canvas_users = json.loads(open('canvas/users.json','r').read()) t = url + '/api/v1/users/self/files' i = 0 j = 0 pics_dir = 'pics2017/' for x in canvas_users: j += 1 if x['login_id'].lower() + '.jpg' in files: #print x['login_id'] + " " + x['name'] i += 1 pics_i_have[x['id']] = x print('Canvas users: ' + str(j)) print('Pic matches: ' + str(i)) account_count = 0 ids_i_uploaded = [] for id, target in list(pics_i_have.items()): #if account_count > 50: # print 'Stopping after 5.' # break print('trying ' + target['name'] + '(' + str(id) + ')') if checkForAvatar(id): print("Seems to have avatar loaded.") continue goo = target['login_id'].lower() local_img = pics_dir + goo + '.jpg' inform_parameters = { 'name':goo + '.jpg', 'size':os.path.getsize(local_img), # read the filesize 'content_type':'image/jpeg', 'parent_folder_path':'profile pictures', 'as_user_id':'{0}'.format(id) } res = requests.post(t, headers = header, data=inform_parameters) print("Done prepping Canvas for upload, now sending the data...") json_res = json.loads(res.text,object_pairs_hook=collections.OrderedDict) files = {'file':open(local_img,'rb').read()} _data = list(json_res.items()) _data[1] = ('upload_params',list(_data[1][1].items())) print("Yes! Done sending pre-emptive 'here comes data' data, now uploading the file...") upload_file_response = requests.post(json_res['upload_url'],data=_data[1][1],files=files,allow_redirects=False) # Step 3: Confirm upload print("Done uploading the file, now confirming the upload...") confirmation = requests.post(upload_file_response.headers['location'],headers=header) if 'id' in confirmation.json(): file_id = confirmation.json()['id'] else: print('no id here') #print(confirmation.json()) print("upload confirmed...nicely done!") time.sleep(1) # Make api call to set avatar image to the token of the uploaded imaged (file_id) params = { 'as_user_id':'{0}'.format(id)} avatar_options = requests.get(url + "/api/v1/users/%s/avatars"% '{0}'.format(id),headers=header,params=params) #print "\nAvatar options: " #print avatar_options.json() for ao in avatar_options.json(): #print ao.keys() if ao.get('display_name')==goo + '.jpg': #print("avatar option found...") #print((ao.get('display_name'),ao.get('token'), ao.get('url'))) params['user[avatar][token]'] = ao.get('token') set_avatar_user = requests.put(url + "/api/v1/users/%s"% '{0}'.format(id),headers=header,params=params) if set_avatar_user.status_code == 200: print(('success uploading user avatar for {0}'.format(id))) account_count += 1 ids_i_uploaded.append(id) else: print('some problem setting avatar') else: pass #print 'didnt get right display name?' print("Uploaded these guys: " + json.dumps(ids_i_uploaded)) ########## ########## ########## EMAILING PEOPLE ########## ########## #def test_email(): # send_z_email("Peter Howell", "Peter", "phowell@gavilan.edu", ['CSIS85','CSIS42']) def create_ztc_list(): course_combos = pd.read_csv('cache/teacher_course_oer_email_list.csv') course_combos.fillna('',inplace=True) # read this file and make it a dict (in one line!) dept_counts = { x[0]:x[1].strip() for x in [ y.split(',') for y in open('cache/teacher_course_oer_deptcount.csv','r').readlines() ][1:] } course_template = "%s    " url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s" # % (FULLNAME, COURSE1) # list depts mydepts = sorted(list(set(course_combos['dept'] ))) i = 0 outp = open("output/oer_email_list.csv","w") outp.write("fullname,firstname,email,link,courses\n") ones_i_did = [ int(x) for x in "40 38 31 21 7 12 24 25 1 13 18 22 44 55 56 51 20 16 2 3 4 5 6 8 9 10 11 14 15 17 23 53 52 50 30 48 39 37 54 49 47 46 45 43 42 41 33 32 29 28 27 26".split(" ") ] for D in mydepts: i += 1 extra = '' if D in dept_counts: extra = " (%s)" % dept_counts[D] extra2 = '' if i in ones_i_did: extra2 = "xxxx " print("%s %i. %s %s" % (extra2,i,D,extra)) choice_list = input("Which department? (for multiple, separate with spaces) ").split(' ') all_people_df = [] for choice in choice_list: is_cs = course_combos['dept']==mydepts[int(choice)-1] filtered = pd.DataFrame(course_combos[is_cs]) if len(all_people_df): all_people_df = pd.concat([filtered,all_people_df]) else: all_people_df = filtered print(mydepts[int(choice)-1]) print(all_people_df) print(' ') all_people_df.sort_values(by=['name'],inplace=True) print(all_people_df) b = all_people_df.groupby(['name']) for name,group in b: if name == 'no data': continue nameparts = name.split(', ') fullname = nameparts[1] + ' ' + nameparts[0] firstname = nameparts[1] outp.write(fullname + ',' + firstname + ',') email = '' link = '' courses = [] flag = 1 for i in group.iterrows(): g = i[1] # wtf is this shi..... this_course = g.dept + ' ' + str(g.codenum) + g.codeletter courses.append( this_course ) #print(g) email = g.email if flag: link = url_template % (fullname, this_course) flag = 0 outp.write(email + ',' + link + "," + " ".join(courses) + "\n") outp.close() ########## ########## ########## FORENSICS TYPE STUFF ########## ########## # better name for this standard fetch. so they stay together in alpha order too.... def get_user_info(id): u = fetch( '/api/v1/users/%i' % id ) ff = codecs.open('cache/users/%i.txt' % id, 'w', 'utf-8') ff.write( json.dumps(u, indent=2)) return u # these are any messages that get pushed out to their email def comm_mssgs_for_user(uid=0): if not uid: uid = input('Canvas id of the user? ') u = url + '/api/v1/comm_messages?user_id=%s&start_time=%s&end_time=%s' % (uid,'2021-01-01T01:01:01Z','2021-08-01T01:01:01Z') # &filter[]=user_%s' % uid convos = fetch(u,1) oo = codecs.open('cache/comms_push_user_%s.txt' % str(uid), 'w') oo.write('USER %s\n' % uid) oo.write(json.dumps(convos, indent=2)) print(convos) # def convos_for_user(uid=0): if not uid: uid = input('Canvas id of the user? ') u = url + '/api/v1/conversations?include_all_conversation_ids=true&as_user_id=%s' % uid # &filter[]=user_%s' % uid convos = fetch(u,1) oo = codecs.open('cache/convo_user_%s.txt' % str(uid), 'w') oo.write('USER %s\n' % uid) oo.write(json.dumps(convos, indent=2)) convo_ids_list = convos["conversation_ids"] print(convo_ids_list) u2 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=archived&as_user_id=%s' % uid # &filter[]=user_%s' % uid archived_convos = fetch(u2,1) try: aconvo_ids_list = archived_convos["conversations_ids"] print(aconvo_ids_list) except: print("didnt seem to be any archived.") aconvo_ids_list = [] u3 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=sent&as_user_id=%s' % uid # &filter[]=user_%s' % uid sent_convos = fetch(u3,1) try: sconvo_ids_list = sent_convos["conversations_ids"] print(sconvo_ids_list) except: print("didnt seem to be any sent.") sconvo_ids_list = [] convo_ids_list.extend(aconvo_ids_list) convo_ids_list.extend(sconvo_ids_list) ## ## Now get all the messages in each of these conversations ## for cid in convo_ids_list: print("Fetching conversation id: %s" % cid) oo.write("\n\n----------------\nconversation id: %s\n\n" % cid) u4 = url + '/api/v1/conversations/%s?as_user_id=%s' % (cid,uid) # ' % (cid, uid coverstn = fetch(u4,1) oo.write("\n%s\n\n" % json.dumps(coverstn,indent=2)) """ for c in convos: c['participants'] = ", ".join([ x['name'] for x in c['participants'] ]) includes = tuple("last_message subject last_message_at participants".split(" ")) convos = list( \ reversed([ funcy.project(x, includes) for x in convos ])) """ # #print(json.dumps(convos, indent=2)) # single q sub def quiz_get_sub(courseid, quizid, subid=0): u = url + "/api/v1/courses/%s/quizzes/%s/submissions/%s" % ( str(courseid), str(quizid), str(subid) ) u = url + "/api/v1/courses/%s/quizzes/%s/questions?quiz_submission_id=%s" % \ ( str(courseid), str(quizid), str(subid) ) u = url + "/api/v1/courses/%s/assignments/%s/submissions/%s?include[]=submission_history" % \ ( str(courseid), str(quizid), str(subid) ) u = url + "/api/v1/courses/%s/students/submissions?student_ids[]=all&include=submission_history&grouped=true&workflow_state=submitted" % str(courseid) return fetch(u) #?quiz_submission_id=%s" # quiz submissions for quiz id x, in course id y def quiz_submissions(courseid=9768, quizid=32580): #subs = quiz_get_sub(courseid, quizid) #print( json.dumps( subs, indent=2 ) ) if 1: # POST data = { "quiz_report[includes_all_versions]": "true", "quiz_report[report_type]": "student_analysis" } u = url + "/api/v1/courses/%s/quizzes/%s/reports?" % ( str(courseid), str(quizid) ) res = requests.post(u, headers = header, data=data) print(res.content) #u2 = url + "/api/v1/courses/%s/quizzes/%s/reports" % ( str(courseid), str(quizid) ) #res2 = fetch(u2) #print( json.dumps(res2.content, indent=2)) jres2 = json.loads( res.content ) print(jres2) if jres2['file'] and jres2['file']['url']: u3 = jres2['file']['url'] r = requests.get(u3, headers=header, allow_redirects=True) open('cache/quizreport.txt', 'wb').write(r.content) return for R in res2: if R['id'] == 7124: u3 = R['url'] r = requests.get(u3, headers=header, allow_redirects=True) open('cache/quizreport.txt', 'wb').write(r.content) return u3 = url + "/api/v1/courses/%s/quizzes/%s/reports/%s" % ( str(courseid), str(quizid), res2[''] ) oo = codecs.open('cache/submissions.json','w', 'utf-8') oo.write('[\n') for s in subs: if len(s['submissions']): j = json.dumps(s, indent=2) print(j) oo.write(j) oo.write('\n') oo.write('\n]\n') return 0 #u = url + "/api/v1/courses/%s/quizzes/%s/submissions?include[]=submission" % (str(courseid), str(quizid)) u = url + "/api/v1/courses/%s/quizzes/%s/submissions" % (str(courseid), str(quizid)) subs = fetch(u, 0) print( json.dumps( subs, indent=1 ) ) for S in subs['quiz_submissions']: print(json.dumps(S)) submis = quiz_get_sub(courseid, quizid, S['id']) print(json.dumps(submis, indent=2)) # return (timeblock, course, read=0,write=1) def requests_line(line,i=0): try: L = line # strip? if type(L) == type(b'abc'): L = line.decode('utf-8') for pattern in unwanted_req_paths: if pattern in L: return 0 i = 0 line_parts = list(csv.reader( [L] ))[0] #for p in line_parts: # print("%i\t%s" % (i, p)) # i += 1 d = parser.parse(line_parts[7]) d = d.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) d = timeblock_24hr_from_dt(d) #r = re.search('context\'\:\s(\d+)', line_parts[22]) #c = 0 #if r: # c = r.groups(1) str1 = line_parts[20] str2 = str1.replace("'",'"') str2 = str2.replace("None",'""') #print(str2) j = json.loads(str2 ) c = j['context'] a = line_parts[5] #print( str( (d, c, a) )) return (d, str(c), a) except Exception as e: #print("Exception: " + str(e)) return 0 # def report_logs(id=0): if not id: L = ['10531', ] else: L = [ id, ] report = [] for id in L: emt_by_id = course_enrollment(id) for U in emt_by_id.values(): user_d = defaultdict( int ) print( "Lookin at user: %s" % U['user']['name'] ) report.append( "User: %s\n" % U['user']['name'] ) log_file_name = 'cache/users/logs/%i.csv' % U['user']['id'] if path.exists(log_file_name): print("Log file %s exists" % log_file_name) temp = open(log_file_name, 'r').readlines() for T in temp[1:]: #print(T) result = requests_line(T) if result: (d, c, a) = result if c == id: user_d[d] += 1 print(json.dumps(user_d, indent=2)) for V in sorted(user_d.keys()): report.append( "\t%s: %i\n" % ( dt_from_24hr_timeblock(V), user_d[V]) ) report.append("\n\n") return report def track_users_in_sem(): L = users_this_semester_db() sL = list(L) sL.sort(reverse=True) fetch_queue = queue.Queue() for i in range(num_threads): worker = Thread(target=track_user_q, args=(i,fetch_queue)) worker.setDaemon(True) worker.start() for U in sL: print( "adding %s to the queue" % U ) fetch_queue.put( U ) fetch_queue.join() print("Done.") def track_users_in_class(L=[]): if len(L)==0: #id = '10531' ids = input("Course ids, separated with comma: ") L = [x for x in ids.split(',')] print("Getting users in: " + str(L)) fetch_queue = queue.Queue() for i in range(num_threads): worker = Thread(target=track_user_q, args=(i,fetch_queue)) worker.setDaemon(True) worker.start() users_set = set() for id in L: emt_by_id = course_enrollment(id) print(emt_by_id) for U in emt_by_id.values(): if not U['user_id'] in users_set: print(U) print( "adding %s to the queue" % U['user']['name'] ) fetch_queue.put( U['user_id'] ) users_set.add(U['user_id']) all_reports = [] fetch_queue.join() print("Done with %i users in these courses." % len(users_set)) for id in L: rpt = report_logs(id) all_reports.append(rpt) outp = codecs.open('cache/courses/report_%s.txt' % id, 'w', 'utf-8') outp.write(''.join(rpt)) outp.close() return all_reports def track_user_q(id, q): while True: user = q.get() print("Thread %i: Going to download user %s" % (id, str(user))) try: track_user(user, id) except FetchError as e: pass q.task_done() def count_courses(csv_file="cache/users/logs/45268.csv"): course_counts = defaultdict(int) course_regex = r'courses/(\d+)/' with open(csv_file, mode='r') as file: reader = csv.reader(file) for row in reader: match = re.search(course_regex, row[1]) if match: course_id = match.group(1) course_counts[course_id] += 1 for course_id, count in course_counts.items(): print(f"Course ID {course_id}: {count} occurrences") # honestly it doesn't make much sense to get full histories this way if they're # already in the canvas data tables.... # just the most recent hits or a short period # # Live data would be better. # Maintain local logs. Look to see if we have some, download logs since then for a user. def track_user(id=0,qid=0): global recvd_date L = [id,] if not id: ids = input("User ids (1 or more separated by comma): ") L = [int(x) for x in ids.split(',')] print("Getting users: " + json.dumps(L)) for id in L: id = int(id) # Open info file if it exists, check for last day retrived try: infofile = open("cache/users/%i.txt" % id, 'r') info = json.loads( infofile.read() ) # TODO: set up this info file if it isn't there. check any changes too. it # was written where?.... infofile.close() except Exception as e: print("failed to open info file for user id %i" % id) info = get_user_info(id) print("(%i) Student %i Info: " % (qid,id)) #print( json.dumps(info, indent=2)) url_addition = "" if 1: # hard code dates start_date = "2021-01-01T00:00:00-07:00" end_date = dt.now().strftime("%Y-%m-%dT%H:%M:%S-07:00") # end_date = "2026-07-01T00:00:00-07:00" url_addition = f"?start_time={start_date}&end_time={end_date}" elif 'last_days_log' in info: print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id'])) print("Last day logged was: %s" % info['last_days_log']) url_addition = "?start_time=%s" % info['last_days_log'] the_stamp = parser.parse(info['last_days_log']) the_stamp = the_stamp.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) now = dt.now() now = now.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific')) dif = now - the_stamp print("It was %s ago" % str(dif)) if the_stamp < lds_stamp: print("Too long, taking default") url_addition = "?start_time=%s" % log_default_startdate #lds_stamp = parser.parse(log_default_startdate) ########## else: url_addition = "?start_time=%s" % log_default_startdate #if dif.days > 1: url = "/api/v1/users/%i/page_views%s" % (id, url_addition) print(url) try: api_gen = fetch_stream(url,0) log_file_name = 'cache/users/logs/%i.csv' % id if path.exists(log_file_name): print("Log file %s exists" % log_file_name) temp = open(log_file_name, 'a', newline='') csv_writer = csv.writer(temp) else: print("Creating new log file: %s" % log_file_name) temp = open(log_file_name, 'w', newline='') ### TODO csv_writer = csv.writer(temp) count = 0 for result in api_gen: if count == 0 and len(result): header = result[0].keys() csv_writer.writerow(header) # results come in newest first.... recvd_date = result[0]['updated_at'] print("(%i) Most recent hit is %s" % (qid,recvd_date)) count += len(result) indent = " " * qid #print("(%i) Got %i records, %i so far" % (qid,len(result),count)) print("(%s - %i) %s %i" % (qid, id, indent, count)) if count > max_log_count: print("Too many logs, bailing. sorry.") break for R in result: csv_writer.writerow(R.values()) latest = parser.parse(recvd_date) #last_full_day = (latest - timedelta(days=1)).isoformat() info['last_days_log'] = recvd_date #last_full_day infofile = open("cache/users/%i.txt" % id, 'w') infofile.write(json.dumps( info, indent=2 )) infofile.close() print("(%i) Output to 'cache/users/log/%i.csv'" % (qid,id)) except FetchError as e: print("Getting a 502 error.") raise FetchError() except Exception as e2: print("Got an error receiving logs: %s" % str(e2)) # def track_users_by_teacherclass(): all_teachers = teachers_courses_semester() skip_to = "Punit Kamrah" skipping = 1 grouped = funcy.group_by( lambda x: x[4], all_teachers ) g2 = {} for k,v in grouped.items(): print(k) if skipping and skip_to != k: print("skipping") continue skipping = 0 g2[k] = list(funcy.distinct( v, 1 )) print("\n\n\n\n\n") print(k) print("\n\n\n\n\n") teacherfile = codecs.open('cache/teacherdata/reports/%s.txt' % k.replace(" ","_"),'w','utf-8') class_ids = funcy.lpluck(1,v) class_names = funcy.lpluck(2,v) print(class_ids) print(class_names) rpts = track_users_in_class(class_ids) for i, R in enumerate(rpts): teacherfile.write('\n\n\n---\n\n%s \n\n' % class_names[i]) teacherfile.write(''.join(R)) teacherfile.flush() teacherfile.close() print(json.dumps(g2, indent=2)) def section_enroll(): user = input("user id> ") sect = input("section id> ") u = f"{url}/api/v1/sections/{sect}/enrollments" param = { 'enrollment[user_id]':user, 'enrollment[type]': 'StudentEnrollment', 'enrollment[enrollment_state]': 'active', } res = requests.post(u, headers = header, data=param) print(res.text) def one_course_enrol(): users = input("user,ids,separated,by,commas> ").split(',') course = input("course id> ") my_types = {1:'StudentEnrollment', 2:'TeacherEnrollment'} print(json.dumps(my_types,indent=2)) the_type = my_types[ int(input("enrollment type> ")) ] # the_type = 'StudentEnrollment' # the_type = 'TeacherEnrollment' u = f"{url}/api/v1/courses/{course}/enrollments" for user in users: param = { 'enrollment[user_id]':user, 'enrollment[type]': the_type, 'enrollment[enrollment_state]': 'active', } res = requests.post(u, headers = header, data=param) print(res.text) def find_new_teachers(): filename = "cache/fa22_sched.json" jj = json.loads(codecs.open(filename,'r','utf-8').read()) for J in jj: print( J['teacher']) import traceback def find_no_goo(): DO_DELETE_USERS = 0 DO_DELETE_PORTFOLIOS = 0 output = codecs.open('cache/no_goo_numbers.json','w','utf-8') output2 = codecs.open('cache/wrong_root_acct.json','w','utf-8') output3 = codecs.open('cache/wrong_sis_import_id.json','w','utf-8') output4 = codecs.open('cache/bad_portfolios.json','w','utf-8') #output5 = codecs.open('cache/bad_portfolios_detail.html','w','utf-8') all = [] no_root = [] no_sis = [] port = [] i = 0 j = 0 k = 0 p = 0 users = json.loads(codecs.open('cache/allusers.json','r','utf-8').read()) for u in users: if not 'login_id' in u: print(u['name']) i+=1 all.append(u) user_port = [] pp = fetch(url + '/api/v1/users/%s/eportfolios' % str(u['id'])) for p_user in pp: try: user_port.append( fetch(url+'/api/v1/eportfolios/%s' % str(p_user['id']) ) ) if DO_DELETE_PORTFOLIOS: #output5.write("
deleted: %s\n" % (str(p_user['id']),str(p_user['id'])) ) #output5.flush() del_request = requests.delete(url + "/api/v1/eportfolios/%s" % str(p_user['id']) ,headers=header) print(del_request.text) except Exception as e: traceback.print_exc() p += len(pp) port.append(pp) if DO_DELETE_USERS: print("Deleting %s..." % u['name']) del_request = requests.delete(url + "/api/v1/accounts/1/users/%s" % str(u['id']) ,headers=header) print(del_request.text) if 'root_account' in u and u['root_account'] != "ilearn.gavilan.edu": no_root.append(u) j += 1 if 'sis_import_id' in u and not u['sis_import_id']: no_sis.append(u) k += 1 print("Found %i users without G numbers" % i) print("Found %i users with non gav root account" % j) print("Found %i users without sis id" % k) print("Found %i questionable portfolios" % p) output.write( json.dumps(all,indent=2) ) output2.write( json.dumps(no_root,indent=2) ) output3.write( json.dumps(no_sis,indent=2) ) output4.write( json.dumps(port,indent=2) ) def track_a_user(): a = input("User ID? ") track_user(a) def compare_db_tables(): import requests # Download JSON files url_a = 'http://www.gavilan.edu/staff/tlc/db.php?a=dir' url_b = 'http://www.gavilan.edu/staff/tlc/db.php?a=confusers' response_a = requests.get(url_a) print('got 1') response_b = requests.get(url_b) print('got 2') # Parse JSON data data_a = response_a.json() data_b = response_b.json() by_email_conf = {} for item in data_a: by_email_conf[item['email']] = item # Extract email addresses from each file emails_a = {item['email'] for item in data_a} emails_b = {item['email'] for item in data_b} emails_a = {item for item in emails_a if item is not None} emails_b = {item for item in emails_b if item is not None} emails_a = {item.lower() for item in emails_a} emails_b = {item.lower() for item in emails_b} # Find common emails common_emails = emails_a.intersection(emails_b) # Find distinct emails for each file distinct_emails_a = emails_a.difference(emails_b) distinct_emails_b = emails_b.difference(emails_a) # Print the results print("Common Emails:") for email in sorted(list(common_emails)): print(email) print("\nDistinct Emails in Staff directory:") for email in sorted(list(distinct_emails_a)): print(email) print("\nDistinct Emails in conf users table:") for email in sorted(list(distinct_emails_b)): print(email) out = codecs.open('cache/users_fix.sql','w','utf-8') for e in common_emails: out.write(f"update `conf_users` set `p2id`='{by_email_conf[e]['id']}' where lower(`email`)='{e}';\n") def training_find_goos(): from openpyxl import Workbook, load_workbook from openpyxl.chart import BarChart, Series, Reference from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill wb = load_workbook("C:/Users/peter/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx") print(wb.sheetnames) all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) records = {} sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed','BOOT CAMP','TITLE V GOTT ACADEMY', 'Other Certifications'] for sname in sheets: print(f"{sname}:") sheet = wb[sname] records[sname] = {} for row in sheet.iter_rows(): try: name = row[0].value if row[0].value == 'G Number': continue lastname = name.split(' ')[-1].lower() goo = row[1].value if not goo: print(f" Missing ID: {row[0].value}") for t in all_teachers: if re.search(lastname, t['name'].lower()): print(f" {t['name']}: {t['sis_user_id']}") except Exception as e: pass print() # Cross-reference training records with upcoming sections to flag missing GOTT work. def cross_ref_training(): from semesters import find_term from openpyxl import Workbook, load_workbook from openpyxl.chart import BarChart, Series, Reference from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill wb = load_workbook("C:/Users/phowell/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx") print(wb.sheetnames) term = find_term("sp26") # Fetch from Canvas DB. Make sure its recently updated. # Also relies on schedule being in database. Run localcache2.courses_to_sched() # OR localcache2.refresh_semester_schedule_db() #courses = all_2x_sem_courses_teachers('202550', '202570') # courses = all_sem_courses_teachers(term['bannercode']) # report for email report = codecs.open('cache/gott_report.txt','w','utf-8') # update local list of teachers from ilearn? RELOAD_TEACHERS = 0 ask = input('download new list of teachers? (y/n) ') if ask.strip()=='y': RELOAD_TEACHERS = 1 RELOAD_SCHEDULE = 0 ask2 = input('refresh schedule db? (y/n) ') if ask2.strip()=='y': RELOAD_SCHEDULE = 1 if RELOAD_SCHEDULE: refresh_semester_schedule_db(term['code']) if RELOAD_TEACHERS: teacherRolesUpdateCache() # TODO inefficient but just read it again all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = campus_dept_hierarchy() records = {} sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed', 'SU21 Workshop', 'BOOT CAMP', 'GOTT ABC', 'TITLE V GOTT ACADEMY', 'Other Certifications'] for sname in sheets: print(f"{sname}:") sheet = wb[sname] records[sname] = {} for row in sheet.iter_rows(): if row[1].value == 'ID': continue goo = row[1].value rowvals = [str(v.value) for v in row] records[sname][goo] = rowvals print(" " + " ".join(rowvals)) print() #print(json.dumps(records,indent=2)) teachers = defaultdict(list) teachers_bydept = defaultdict(set) alldepts = set() # reconfigure to key on goo by_goo = defaultdict(dict) for course,coursedict in records.items(): print(course) for goo,record in coursedict.items(): if goo=='ID': continue if record[0]=='Name': continue print(f" {goo}") try: if len(record)<3: by_goo[goo][course] = "ok" elif record[4]=="None": by_goo[goo][course] = "ok" else: by_goo[goo][course] = record[4] # record the end date except: print(f" -- problem with this record {json.dumps(record)}") by_goo[goo][course] = 'ok' bg_file = codecs.open('cache/gott_by_goo.json','w','utf-8') bg_file.write(json.dumps(by_goo,indent=2)) ## ## Start looking at the courses to cross reference ## for c in courses: print(c) try: goo = c[6] ## is this changing? c[8] crn = c[8] name = c[1] # full course name teacher = c[4] # last, first ctype = c[7] dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1) alldepts.add(dept1) d = list(c) #d.append(ctype) if not ctype: print(f"not finding mode for {name}") continue if ctype=='in-person': continue teachers[teacher].append(d) teachers_bydept[dept1].add(teacher) except: print(f"Problem with {c}, skipping") alldepts = list(alldepts) alldepts.sort() sheet = wb.create_sheet("New Summary") r = 1 deptfont = Font(bold=True) flagfont = PatternFill("solid", fgColor="00FFFFCC") for thedean in ['et','nl','vc','jn', 'de']: sheet.cell(row=r, column=1).value = dean_code_to_name[thedean] sheet.cell(row=r, column=1).font = deptfont r += 2 report.write(f"Dean: {dean_code_to_name[thedean]}\n") for D in alldepts: if not D in course_to_dean: print(f"MISSING DEAN for dept: {D}") if course_to_dean[D] == thedean: if len(teachers_bydept[D]) == 0: continue print(f"\n------------\n{D}") sheet.cell(row=r, column=1).value = D sheet.cell(row=r, column=1).font = deptfont r += 1 for t in teachers_bydept[D]: if t == 'STAFF, STAFF': continue waived = False sects = teachers[t] print(f"Sections for {t}: {sects}") goo = sects[0][6] course_mode = sects[0][7] print(t) sheet.cell(row=r, column=1).value = f"{t}" sheet.cell(row=r, column=2).value = f"{goo}" teacher_row = r r += 1 has_gott1 = goo in records['GOTT1'] has_gott2 = goo in records['GOTT2'] if has_gott1: sheet.cell(row=r, column=2).value = f"✓ GOTT 1 Trained" print(f" GOTT 1") r += 1 if goo in records['Other Certifications']: sheet.cell(row=r, column=2).value = f"✓ GOTT Waived - Outside Training" print(f" GOTT outside training") waived = True r += 1 if has_gott2: sheet.cell(row=r, column=2).value = f"✓ GOTT 2 Trained" print(f" GOTT 2") r += 1 if goo in records['POCR Reviewed']: sheet.cell(row=r, column=2).value = f"✓ POCR Reviewed" print(f" POCR") waived = True r += 1 if goo in records['TITLE V GOTT ACADEMY']: sheet.cell(row=r, column=2).value = f"✓ TITLE V GOTT ACADEMY 2014" print(f" GOTT Academy") waived = True r += 1 missing_requirements = [] if (not has_gott1) and (not waived): missing_requirements.append("GOTT 1") if (not has_gott2) and (not waived): missing_requirements.append("GOTT 2") if missing_requirements: sheet.cell(row=teacher_row, column=1).fill = flagfont for requirement in missing_requirements: sheet.cell(row=r, column=2).value = f"- MISSING {requirement}" report.write(f"\tMISSING {requirement}: {t} {goo}\n") r += 1 if goo in records['GOTT4']: sheet.cell(row=r, column=2).value = f"✓ GOTT 4 Trained" print(f" GOTT 4") r += 1 if goo in records['GOTT5']: sheet.cell(row=r, column=2).value = f"✓ GOTT 5 Trained" print(f" GOTT 5") r += 1 if goo in records['GOTT6']: sheet.cell(row=r, column=2).value = f"✓ GOTT 6 Trained" print(f" GOTT ") r += 1 if goo in records['SU21 Workshop']: sheet.cell(row=r, column=2).value = f"✓ SU21 Workshop" print(f" summer 21 workshop") r += 1 if goo in records['HUM.STEM']: sheet.cell(row=r, column=2).value = f"✓ Humanizing Stem" print(f" humanizing stem") r += 1 if goo in records['BOOT CAMP']: sheet.cell(row=r, column=2).value = f"✓ Boot Camp Self Paced" print(f" bc self paced") r += 1 if goo in records['GOTT ABC']: sheet.cell(row=r, column=2).value = f"✓ {records['GOTT ABC'][goo][2]} Self Paced" print(f" GOTT abc self paced") r += 1 for s in sects: sheet.cell(row=r, column=2).value = f"{s[3]}" sheet.cell(row=r, column=3).value = f"{s[1]}" r += 1 if missing_requirements: report.write(f"\t\t{s[3]}\t{s[1]}\t{s[7]}\n") if missing_requirements: report.write(f"\n") #for c in sheet.columns: # print(c) # print(f"{c} width: {sheet.column_dimensions[c].value}") sheet.column_dimensions['A'].width = 20 sheet.column_dimensions['B'].width = 30 sheet.column_dimensions['C'].width = 75 formatted_date = dt.now().strftime('%Y%m%d') wb.save(f"C:/Users/phowell/Downloads/GOTT_Completion_masterlist_{formatted_date}_summarized.xlsx") def cross_ref_training_withcsv(): from localcache2 import all_sem_courses_teachers from localcache import course_mode from localcache import sem_schedule gott1 = {} g1read = csv.reader(codecs.open('cache/GOTT_1.csv','r','utf-8')) i = 0 for row in g1read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott1[goo] = row gott2 = {} g2read = csv.reader(codecs.open('cache/GOTT_2.csv','r','utf-8')) i = 0 for row in g2read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott2[goo] = row gott4 = {} g4read = csv.reader(codecs.open('cache/GOTT_4.csv','r','utf-8')) i = 0 for row in g4read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott4[goo] = row gott5 = {} g5read = csv.reader(codecs.open('cache/GOTT_5.csv','r','utf-8')) i = 0 for row in g5read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott5[goo] = row gott6 = {} g6read = csv.reader(codecs.open('cache/GOTT_6.csv','r','utf-8')) i = 0 for row in g6read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott6[goo] = row gott9 = {} g9read = csv.reader(codecs.open('cache/GOTT_others.csv','r','utf-8')) i = 0 for row in g9read: if i == 0: headers = row #print(f"Headers: {headers}") i += 1 else: goo = row[1] gott9[goo] = row #report = codecs.open('cache/training_report.csv','w','utf-8') #out = csv.writer(report) #out.writerow(['crn','course','mode','teacher','goo','training','training_date']) #sections = sem_schedule('sp24') teachers = defaultdict(list) teachers_bydept = defaultdict(set) alldepts = set() courses = all_sem_courses_teachers() for c in courses: goo = c[7] crn = c[2].split(' ')[-1].split('/')[0] name = c[2] teacher = c[4] ctype = course_mode(crn,'sp24') dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1) alldepts.add(dept1) d = list(c) d.append(ctype) if ctype=='in-person': continue teachers[teacher].append(d) teachers_bydept[dept1].add(teacher) #print(f"{crn} {ctype} {name} ") #if goo in gott1: # out.writerow([c[1], c[2], ctype, c[4], goo, "GOTT 1", "/".join(gott1[goo][2:])]) # #else: # out.writerow([c[1], c[2], ctype, c[4], goo, "GOTT 1 MISSING", '']) alldepts = list(alldepts) alldepts.sort() for D in alldepts: print(f"\n------------\n{D}") for t in teachers_bydept[D]: if t == 'STAFF STAFF': continue sects = teachers[t] print(t) goo = sects[0][7] if goo in gott1: print(" + GOTT 1 Trained") else: print(" - MISSING GOTT 1") if goo in gott2: print(" + GOTT 2 Trained") if goo in gott4: print(" + GOTT 4 Trained") if goo in gott5: print(" + GOTT 5 Trained") if goo in gott6: print(" + GOTT 6 Trained") if goo in gott9: print(" + GOTT Waived - Outside Training") for s in sects: print(f" {s[8]} {s[2]}") print() def get_portfolios(id=0): if not id: id = int( input( "what user id? ")) p = fetch( f"{url}/api/v1/users/{id}/eportfolios" ) print(json.dumps(p, indent=2)) def get_port_pages(id=0): if not id: id = int( input("what portfolio id? ")) p = fetch(f"{url}/api/v1/eportfolios/{id}/pages") print(json.dumps(p, indent=2)) def set_email_skip_confirm(): user_id = '76741' old_email = 'scalhoun@gavilan.edu' new_email = 'scalhoun@hartnell.edu' create_url = url + f'/api/v1/users/{user_id}/communication_channels' print(create_url) list_channels = fetch(create_url) print(json.dumps(list_channels,indent=2)) for ch in list_channels: id = ch['id'] result = requests.delete(create_url+f"/{id}", headers=header) print(f"deleting id {id}") print(result.content) print('after deleting:') list_channels = fetch(create_url) print(json.dumps(list_channels,indent=2)) print('\n\n') body = { 'communication_channel[address]' : old_email, 'communication_channel[type]' : 'email', 'skip_confirmation': True } response = requests.post(create_url, headers=header, data = body) print(response.json()) #confirm that the channel was created body = { 'communication_channel[address]' : new_email, 'communication_channel[type]' : 'email', 'skip_confirmation': True } response = requests.post(create_url, headers=header, data = body) print(response.json()) #confirm that the channel was created print('after creating:') list_channels = fetch(create_url) print(json.dumps(list_channels,indent=2)) print('\n\n') from html.parser import HTMLParser class HTMLStripper(HTMLParser): def __init__(self): super().__init__() self.reset() self.fed = [] def handle_data(self, d): self.fed.append(d) def get_text(self): return ' '.join(self.fed) def strip_html_and_truncate(html, length=25): if not html: return "" stripper = HTMLStripper() stripper.feed(html) text = stripper.get_text() text = ' '.join(text.split()) # collapse all whitespace text = re.sub(r'\n',' ', text) return text[:length] def summarize_submissions(submissions): summary = [] for sub in submissions: assignment = sub.get("assignment", {}) summary.append({ "submission": { "id": sub.get("id"), "excerpt": strip_html_and_truncate(sub.get("body", "")), "grade": sub.get("grade"), "submitted_at": sub.get("submitted_at"), "workflow_state": sub.get("workflow_state"), "missing": sub.get("missing", False), "late": sub.get("late", False) }, "assignment": { "id": assignment.get("id"), "name": assignment.get("name"), "excerpt": strip_html_and_truncate(assignment.get("description", "")), "due_at": assignment.get("due_at"), "is_quiz": assignment.get("is_quiz_assignment", False), "points_possible": assignment.get("points_possible") } }) return summary import pytz def format_assignments_results_table(results): def safe(val): return str(val) if val is not None else "-" def clip(text,length=40): return (text[:length] + "...") if text and len(text) > length+3 else (text or "") def to_pacific(iso): if not iso: return "-" utc = dt.strptime(iso, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc) pacific = utc.astimezone(pytz.timezone("US/Pacific")) return pacific.strftime("%Y-%m-%d %I:%M%p") # Sort by assignment due date (missing dates go last) def get_due_at(item): dt = item["assignment"].get("due_at") return dt.max if not dt else dt.strptime(dt, "%Y-%m-%dT%H:%M:%SZ") results = sorted(results, key=get_due_at) header = ( "| Type | Subm/Assmt ID | Due Date (PT) | Submitted (PT) | Grade/Points | Assignment Excerpt | Submission Excerpt |" ) sep = ( "|------------|---------------------|--------------------|----------------------|----------------------|-------------------------------|-------------------------------|" ) rows = [] for item in results: a = item["assignment"] s = item["submission"] kind = "quiz" if a.get("is_quiz") else "assignment" id_combo = f"{safe(s['id'])}/{safe(a['id'])}" due_pt = to_pacific(a.get("due_at")) submitted_pt = to_pacific(s.get("submitted_at")) grade = safe(s.get("grade")) points = safe(a.get("points_possible")) flags = [] if s.get("late"): flags.append("late") if s.get("missing"): flags.append("missing") gradepoints = f"{grade}/{points}" + (" " + ",".join(flags) if flags else "") row = ( f"| {kind:<10} | {id_combo:<19} | {due_pt:<18} | {submitted_pt:<20} | {gradepoints:<20} | {clip(a.get('name'),20) + ' - ' + clip(a.get('excerpt')):<49} | {clip(s.get('excerpt')):<29} |" ) rows.append(row) return '\n'.join([header, sep] + rows) def user_course_enrollment(user_id, course_id): user_url = f"{url}/api/v1/courses/{course_id}/enrollments" myparams = {"user_id": user_id, "type[]": "StudentEnrollment", "state[]": ['active','invited','deleted','rejected','completed','inactive']} return fetch(user_url, params=myparams) def get_student_course_assignments(student_id, course_id): submission_params = { "student_ids[]": f"{student_id}", "include[]": ["assignment"] } submissions_url = f"{url}/api/v1/courses/{course_id}/students/submissions" submissions = fetch(submissions_url, params=submission_params) summary = summarize_submissions(submissions) fmt = format_assignments_results_table(summary) return fmt def testme(): course_id = 22054 student_id = 63638 x = get_student_course_assignments(student_id, course_id) print(x) # print(json.dumps(x,indent=2)) #testme() #exit() def get_student_course_grades(student_id, course_id): results = {} # Course details (final grade) submission_params = { "student_ids[]": [f"user_{student_id}"], "include[]": ["assignment"] } user_url = f"{url}/api/v1/courses/{course_id}/enrollments" enrollments = user_course_enrollment(student_id, course_id) if not enrollments or 'errors' in enrollments: final_score = -1 final_grade = -1 else: final_score = enrollments[0].get("grades", {}).get("current_score", -1) final_grade = enrollments[0].get("grades", {}).get("current_grade", -1) # Assignment group mapping ag_url = f"{url}/api/v1/courses/{course_id}/assignment_groups" ag_list = fetch(ag_url) try: group_lookup = {ag["id"]: ag["name"] for ag in ag_list} except: print("groups lookup failed") # Submissions submissions_url = f"{url}/api/v1/courses/{course_id}/students/submissions" submissions = fetch(submissions_url, params=submission_params) assignments = [] for sub in submissions: try: assignment = sub.get("assignment", {}) group_id = assignment.get("assignment_group_id") group = group_lookup.get(group_id, "Uncategorized") assignments.append({ "assignment_name": assignment.get("name"), "category": group, "due_at": assignment.get("due_at"), "points_possible": assignment.get("points_possible"), "score": sub.get("score"), "submitted": sub.get("submitted_at") is not None, "graded": sub.get("graded_at") is not None or sub.get("score") is not None, "late": sub.get("late", False), "missing": sub.get("missing", False), "excused": sub.get("excused", False) }) except: print("failed to get assignment submissions") assignments = [] results = { #"course_code": course_name, "final_score": final_score, "final_grade": final_grade, "assignments": assignments } return results def summarize_student_logs(id=0): import pandas as pd import re import json from collections import defaultdict from localcache2 import course_from_id if id==0: id = input("student id> ") # Load CSV df = pd.read_csv(f"cache/users/logs/{id}.csv", parse_dates=["updated_at"]) # Extract course_id from URL df["course_id"] = df["url"].str.extract(r"/courses/(\d+)") df = df.dropna(subset=["course_id"]) df["course_id"] = df["course_id"].astype(int) # Convert 'participated' to boolean (robustly) df["participated"] = df["participated"].astype(str).str.lower() == "true" # Truncate to date only df["date"] = df["updated_at"].dt.date # Group by course course_summaries = {} for course_id, group in df.groupby("course_id"): course_data = {} try: this_course = course_from_id(course_id) course_data['course_code'] = this_course['course_code'] except Exception as e: print(f"didn't find course {course_id}, {e}") course_data["first_seen"] = str(group["date"].min()) course_data["last_seen"] = str(group["date"].max()) # Histogram of daily hits daily_counts = group["date"].value_counts().sort_index() course_data["daily_hits"] = {str(k): int(v) for k, v in daily_counts.items()} # Participation count course_data["participated_count"] = int(group["participated"].sum()) course_summaries[str(course_id)] = course_data course_summaries[str(course_id)]['grades'] = get_student_course_grades(id, course_id) # App name tally app_name_counts = df["app_name"].fillna("None").value_counts().to_dict() # Final output output = { "courses": course_summaries, "app_name_counts": app_name_counts } # Write to JSON file with open(f"cache/users/logs/{id}_summary.json", "w") as f: json.dump(output, f, indent=2) print(f"Done. Output written to cache/users/logs/{id}_summary.json") def readable_user_summary(): import re import ast import math from datetime import timedelta from zoneinfo import ZoneInfo import pandas as pd COURSE_RE = re.compile(r"/courses/(\d+)(?:/|$)") def extract_course_id(url, links_str) : """Return Canvas course id from URL (/courses/) or from links.context.""" if url: m = COURSE_RE.search(url) if m: return int(m.group(1)) if links_str: try: d = ast.literal_eval(links_str) ctx = d.get("context") if isinstance(ctx, int): return ctx except Exception: pass return None id = input("user id? ") csv_path = f"cache/users/logs/{id}.csv" gap_minutes = 10 tz_name = "America/Los_Angeles" # Load df = pd.read_csv(csv_path) # Extract course_id df["course_id"] = [ extract_course_id(u, l) for u, l in zip(df.get("url"), df.get("links")) ] # Keep only rows with a course_id df = df.dropna(subset=["course_id"]).copy() df["course_id"] = df["course_id"].astype(int) # Parse times (UTC) and convert to local Pacific time (user asked for “Pacific standard time”; # we’ll convert to America/Los_Angeles which accounts for DST automatically). df["ts_utc"] = pd.to_datetime(df["created_at"], utc=True, errors="coerce") df = df.dropna(subset=["ts_utc"]).copy() local_tz = ZoneInfo(tz_name) df["ts_local"] = df["ts_utc"].dt.tz_convert(local_tz) # Sort for gap detection df = df.sort_values(["course_id", "session_id", "ts_local"]) # Session splitting: new session if gap > gap_minutes within same course+session_id gap = pd.Timedelta(minutes=gap_minutes) # Identify gap starts within each course+session_id stream df["gap_new"] = ( df.groupby(["course_id", "session_id"])["ts_local"] .diff() .gt(gap) .fillna(True) # first row starts a session ) # Session index within each course+session_id df["session_idx"] = df.groupby(["course_id", "session_id"])["gap_new"].cumsum() # Session key across all data df["session_key"] = list(zip(df["course_id"], df["session_id"], df["session_idx"])) # Aggregate to sessions agg = ( df.groupby("session_key") .agg(course_id=("course_id", "first"), start=("ts_local", "min"), end=("ts_local", "max"), hits=("ts_local", "size")) .reset_index(drop=True) ) # Duration in minutes (ceil; minimum 1 minute) dur_secs = (agg["end"] - agg["start"]).dt.total_seconds().clip(lower=0) agg["mins"] = [max(1, math.ceil(s / 60)) for s in dur_secs] # Group by local calendar day for reporting agg["day"] = agg["start"].dt.strftime("%b %-d") # e.g., "Oct 27" # Format start time like "5:05pm" agg["start_str"] = agg["start"].dt.strftime("%-I:%M%p").str.lower() # Order: day, then chronological within day agg = agg.sort_values(["start"]) # Group by calendar date only agg["day_key"] = agg["start"].dt.date # Build text report with nice formatting lines: list[str] = [] for day_key, day_df in agg.groupby("day_key", sort=False): # Pretty date for the section header # Using the first session time for that day to include time first_time = day_df["start"].min() # linux # pretty_day = first_time.strftime("%b %-d %-I:%M%p").lower() # e.g., "Sep 26 8:02pm" # windows pretty_day = first_time.strftime("%b %#d %#I:%M%p").lower() lines.append(f"{pretty_day}:") for _, row in day_df.iterrows(): lines.append( f" - course {row.course_id}, {row.start_str}: " f"{row.hits} hits over {row.mins} minutes" ) report_out = codecs.open(f"cache/users/logs/{id}_report.txt", "w", "utf-8") report_out.write( "\n".join(lines) ) return "\n".join(lines) if lines else "No course activity found." # Example usage: # print(generate_course_report("canvas_logs.csv", gap_minutes=10)) if __name__ == "__main__": print ("") options = { 1: ['Fetch iLearn users with @gavilan.edu email address', teacherRolesUpdateCache], 2: ['Fetch all users',fetchAllUsers], 5: ['Download user avatars', downloadPhoto], 6: ['Merge photo folders', mergePhotoFolders], 7: ['Get all teachers logs 1 month', get_recent_views], 8: ['Gather teacher history, a variety of stats.', getTeachersInfoMain], 9: ['test rtr.', read_training_records], 10: ['Get a users logs', track_user], 11: ['tally a users logs from csv file', count_courses], #11: ['test: oneYearSchedule', oneYearSchedule], 12: ['summarize hit activity', activity_summary], 13: ['Get all users logs in a class', track_users_in_class], 14: ['Get logs for 1 user', track_a_user ], 15: ['Get all users logs in a semester', track_users_in_sem], 16: ['Report on attendance for all classes', track_users_by_teacherclass], 17: ['Show all convos for a user', convos_for_user], 21: ['Show all pushed notifications for a user', comm_mssgs_for_user], 18: ['Quiz submissions', quiz_submissions], #19: ['NLP Sample', nlp_sample], 20: ['Enroll a single user into a class', one_course_enrol], 21: ['Enroll a student into a section', section_enroll], 22: ['Teachers new this semester', find_new_teachers], #22: ['Sync personnel and conference user databases', user_db_sync], 23: ['Find non-gnumbers', find_no_goo ], 24: ['compare user tables', compare_db_tables], 25: ['cross ref training', cross_ref_training], 26: ['find goo numbers in training spreadsheet', training_find_goos], 30: ['get portfolios for user id', get_portfolios], 31: ['get portfolio pages for portfolio id', get_port_pages], 40: ['reset user email without confirmation', set_email_skip_confirm], 41: ['summarize users logs', summarize_student_logs], 50: ['summarize users logs 2', readable_user_summary], #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm], #5: ['Match names in schedule & ilearn', match_usernames], #6: ['Create Dept\'s ZTC list', create_ztc_list], ##7: ['Build and send ZTC emails', send_ztc_mails], #14: ['investigate the logs', investigate_logs], #12: ['test: match_usernames', match_usernames], #13: ['test: get all names', getAllNames], #13: ['x', users_with_gavilan_email], } if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): resp = int(sys.argv[1]) print("\n\nPerforming: %s\n\n" % options[resp][0]) else: print ('') for key in options: print(str(key) + '.\t' + options[key][0]) print('') resp = input('Choose: ') # Call the function in the options dict options[ int(resp)][1]()