Compare commits

...

2 Commits

Author SHA1 Message Date
phowell d66217ec63 flex day helper 2024-07-22 15:20:20 -07:00
phowell 6da470ad1f summer 24 changes 2024-07-22 15:19:31 -07:00
8 changed files with 674 additions and 260 deletions

View File

@ -158,6 +158,7 @@ def accessible_check(id=""):
if not id:
id = input("ID of course to check? ")
verbose = 1
PAGES_ONLY = 1
save_file_types = ['application/pdf','application/docx','image/jpg','image/png','image/gif','image/webp','application/vnd.openxmlformats-officedocument.wordprocessingml.document']
@ -232,36 +233,37 @@ def accessible_check(id=""):
###
### FILES
###
files_f = course_folder + '/files'
headered = 0
print("\nFILES")
try:
os.mkdir(files_f)
except:
print(" * Files folder already exists.")
if not PAGES_ONLY:
files_f = course_folder + '/files'
headered = 0
print("\nFILES")
try:
os.mkdir(files_f)
except:
print(" * Files folder already exists.")
files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
print("LISTING COURSE FILES")
for f in files:
for arg in 'filename,content-type,size,url'.split(','):
if arg=='size':
f['size'] = str(int(f['size']) / 1000) + 'k'
files = fetch('/api/v1/courses/' + str(id) + '/files', verbose)
print("LISTING COURSE FILES")
for f in files:
for arg in 'filename,content-type,size,url'.split(','):
if arg=='size':
f['size'] = str(int(f['size']) / 1000) + 'k'
if f['content-type'] in save_file_types:
d(' - %s' % f['filename'])
if f['content-type'] in save_file_types:
d(' - %s' % f['filename'])
if not os.path.exists(files_f + '/' + f['filename']):
r = requests.get(f['url'],headers=header, stream=True)
with open(files_f + '/' + f['filename'], 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
else:
d(" - already downloaded %s" % files_f + '/' + f['filename'])
if not os.path.exists(files_f + '/' + f['filename']):
r = requests.get(f['url'],headers=header, stream=True)
with open(files_f + '/' + f['filename'], 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
else:
d(" - already downloaded %s" % files_f + '/' + f['filename'])
if not headered:
index.append( ('<br /><b>Files</b><br />') )
headered = 1
index.append( ('files/' + f['filename'], f['filename']) )
if not headered:
index.append( ('<br /><b>Files</b><br />') )
headered = 1
index.append( ('files/' + f['filename'], f['filename']) )
###
### PAGES
@ -295,8 +297,9 @@ def accessible_check(id=""):
if os.path.exists(this_page_filename):
d(" - already downloaded %s" % this_page_filename)
this_page_content = codecs.open(this_page_filename,'r','utf-8').read()
elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
d(' * skipping file behind passwords')
#elif re.search(r'eis-prod',p['url']) or re.search(r'gavilan\.ins',p['url']):
#elif re.search(r'eis-prod',p['url']):
# d(' * skipping file behind passwords')
else:
t2 = fetch('/api/v1/courses/' + str(id) + '/pages/'+p['url'], verbose)
if t2 and 'body' in t2 and t2['body']:
@ -314,22 +317,22 @@ def accessible_check(id=""):
src = I.get('src')
if src:
d(' - %s' % src)
if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
d(' * skipping file behind passwords')
else:
try:
r = requests.get(src,headers=header, stream=True)
mytype = r.headers['content-type']
#print("Response is type: " + str(mytype))
r_parts = mytype.split("/")
ending = r_parts[-1]
#if re.search(r'eis-prod', src) or re.search(r'gavilan\.ins', src):
# d(' * skipping file behind passwords')
#else:
try:
r = requests.get(src,headers=header, stream=True)
mytype = r.headers['content-type']
#print("Response is type: " + str(mytype))
r_parts = mytype.split("/")
ending = r_parts[-1]
with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
image_count += 1
except Exception as e:
d( ' * Error downloading page image, %s' % str(e) )
with open(pages_f + '/' + str(image_count) + "." + ending, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
image_count += 1
except Exception as e:
d( ' * Error downloading page image, %s' % str(e) )
try:
with codecs.open(this_page_filename, 'w','utf-8') as fd:
@ -350,78 +353,80 @@ def accessible_check(id=""):
###
### ASSIGNMENTS
###
headered = 0
asm_f = course_folder + '/assignments'
print("\nASSIGNMENTS")
try:
os.mkdir(asm_f)
except:
d(" - Assignments dir exists")
asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
for p in asm:
d(' - %s' % p['name'])
if not PAGES_ONLY:
headered = 0
asm_f = course_folder + '/assignments'
print("\nASSIGNMENTS")
try:
friendlyfile = to_file_friendly(p['name'])
this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
if os.path.exists(this_assmt_filename):
d(" - already downloaded %s" % this_assmt_filename)
this_assmt_content = open(this_assmt_filename,'r').read()
else:
t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
fd.write(this_assmt_content)
if not headered:
index.append( ('<br /><b>Assignments</b><br />') )
headered = 1
index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
os.mkdir(asm_f)
except:
d(" - Assignments dir exists")
# write to running log of content in order of module
if p['id'] in item_id_to_index:
items[ item_id_to_index[ p['url'] ] ] = this_assmt_content+'\n\n'+pagebreak
except Exception as e:
d(' * Problem %s' % str(e))
asm = fetch('/api/v1/courses/' + str(id) + '/assignments', verbose)
for p in asm:
d(' - %s' % p['name'])
###
### FORUMS
###
index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
try:
friendlyfile = to_file_friendly(p['name'])
this_assmt_filename = asm_f + '/' + str(p['id'])+"_"+ friendlyfile + '.html'
if os.path.exists(this_assmt_filename):
d(" - already downloaded %s" % this_assmt_filename)
this_assmt_content = open(this_assmt_filename,'r').read()
else:
t2 = fetch('/api/v1/courses/' + str(id) + '/assignments/'+str(p['id']), verbose)
with codecs.open(this_assmt_filename, 'w','utf-8') as fd:
this_assmt_content = "<h2>%s</h2>\n%s\n\n" % (t2['name'], t2['description'])
fd.write(this_assmt_content)
if not headered:
index.append( ('<br /><b>Assignments</b><br />') )
headered = 1
index.append( ('assignments/' + str(p['id'])+"_"+friendlyfile + '.html', p['name']) )
"""
# write to running log of content in order of module
if p['id'] in item_id_to_index:
items[ item_id_to_index[ p['url'] ] ] = this_assmt_content+'\n\n'+pagebreak
except Exception as e:
d(' * Problem %s' % str(e))
###
### FORUMS
###
index.extend( extract_forums(id, course_folder, items_inorder, item_id_to_index, verbose) )
"""
###
### QUIZZES
###
###
### QUIZZES
###
# get a list external urls
headered = 0
t = url + '/api/v1/courses/' + str(id) + '/modules'
while t: t = fetch(t)
mods = results
results = []
for m in mods:
# get a list external urls
headered = 0
t = url + '/api/v1/courses/' + str(id) + '/modules'
while t: t = fetch(t)
mods = results
results = []
t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
while t2: t2 = fetch(t2)
items = results
for i in items:
#print i
if i['type'] == "ExternalUrl":
for m in mods:
results = []
t2 = url + '/api/v1/courses/' + str(id) + '/modules/' + str(m['id']) + '/items'
while t2: t2 = fetch(t2)
items = results
for i in items:
#print i
for j in 'id,title,external_url'.split(','):
print unicode(i[j]), "\t",
print ""
if not headered: index.append( ('<br /><b>External Links</b><br />') )
headered = 1
index.append( (i['external_url'], i['title']) )
"""
if i['type'] == "ExternalUrl":
#print i
for j in 'id,title,external_url'.split(','):
print unicode(i[j]), "\t",
print ""
if not headered: index.append( ('<br /><b>External Links</b><br />') )
headered = 1
index.append( (i['external_url'], i['title']) )
"""

View File

@ -1,9 +1,11 @@
from ast import Try
import json, re, requests, codecs, sys, time, funcy, os
import pandas as pd
from datetime import datetime
import pytz
from dateutil import parser
from datetime import datetime
#from symbol import try_stmt
from util import print_table, int_or_zero, float_or_zero, dept_from_name, num_from_name
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
from pipelines import sems
@ -971,12 +973,16 @@ def enroll_id_list_to_shell(id_list, shell_id, v=0):
print("Something went wrong with id %s, %s, %s" % (j, str(s), str(e)))
# multiple semesters
def enroll_stem_students_live():
semesters = [183,184]
for S in semesters:
enroll_stem_students_live_semester(S)
def enroll_stem_students_live_semester(the_term, do_removes=0):
import localcache2
the_term = '181' # su23 fa23 = 180
do_removes = 0
depts = "MATH BIO CHEM CSIS PHYS PSCI GEOG ASTR ECOL ENVS ENGR".split(" ")
users_to_enroll = users_in_by_depts_live(depts, the_term) # term id
@ -1317,23 +1323,15 @@ def course_search_by_sis():
def course_by_depts_terms(section=0):
"""s = [ x.strip() for x in codecs.open('cache/fa22_eval_sections.csv','r').readlines()]
s = list(funcy.flatten(s))
s.sort()
xyz = input('hit return to continue')
"""
#c = getCoursesInTerm(168,0,1)
#c = getCoursesInTerm(174,0,1) # sp22
#c = getCoursesInTerm(176,0,1) # fa22
get_fresh = 1
SP_TERM = 181
WI_TERM = 182
SEM = "sp24"
get_fresh = 0
#SP_TERM = 181
#WI_TERM = 182
TERM = 183
SEM = "su24"
make_changes = 1
make_changes_LS = 1
do_all = 0
winter_start_day = 2
aviation_start_day = 11
@ -1341,16 +1339,20 @@ def course_by_depts_terms(section=0):
spring_start_day = 29
if get_fresh:
c = getCoursesInTerm(SP_TERM,0,0)
codecs.open(f'cache/courses_in_term_{SP_TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
print(f"Getting list of courses in {SEM}")
c = getCoursesInTerm(TERM,0,0)
codecs.open(f'cache/courses_in_term_{TERM}.json','w','utf-8').write(json.dumps(c,indent=2))
else:
c = json.loads( codecs.open(f'cache/courses_in_term_{SP_TERM}.json','r','utf-8').read() )
c = json.loads( codecs.open(f'cache/courses_in_term_{TERM}.json','r','utf-8').read() )
crn_to_canvasid = {}
for C in c:
#print(C['name'])
if 'sis_course_id' in C and C['sis_course_id']:
print( f"{C['name']} -> {C['sis_course_id'][7:13]}" )
crn_to_canvasid[C['sis_course_id'][7:13]] = str(C['id'])
else:
print( f"---NO CRN IN: {C['name']} -> {C}" )
#print(crn_to_canvasid)
#return
@ -1361,39 +1363,60 @@ def course_by_depts_terms(section=0):
start = re.sub( r'\-','/', S['start']) + '/20' + SEM[2:4]
d_start = datetime.strptime(start,"%m/%d/%Y")
if d_start.month > 5:
print("Ignoring ", d_start, " starting too late...")
try:
this_id = crn_to_canvasid[S['crn']]
except Exception as e:
print(f"DIDN'T FIND CRN - {start} {d_start} - {S['code']} {S['crn']} {S['name']}" )
continue
if d_start.month == 1 and d_start.day == aviation_start_day:
print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
continue
print(f" - {start} {d_start} - id: {this_id} - {S['code']} {S['crn']} {S['name']}" )
if 1:
if d_start.month < 5 or d_start.month > 7:
print(f" Ignoring {d_start}, starting too far away...")
continue
if d_start.month == 1 and d_start.day == nursing_start_day:
print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
continue
#if d_start.month == 1 and d_start.day == aviation_start_day:
# print("- Aviation ", start, d_start, " - ", S['code'], " ", S['crn'] )
# continue
if d_start.month == 1 and d_start.day == winter_start_day:
print("+ winter session: ", d_start, " - ", S['code'])
data = {'course[term_id]':WI_TERM}
u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
if make_changes:
r3 = requests.put(u2, headers=header, params=data)
print(" updated.. OK")
#print(r3.text)
continue
#if d_start.month == 1 and d_start.day == nursing_start_day:
# print("- Nursing ", start, d_start, " - ", S['code'], " ", S['crn'] )
# continue
if d_start.month == 1 and d_start.day == spring_start_day:
# normal class
continue
if d_start.month == 5 and d_start.day == 28:
print(" Ignoring, term start date" )
continue
else:
print(" Adjust course start day?")
if make_changes:
if do_all != 'a':
do_all = input(' -> adjust? [enter] for yes, [a] to do all remaining. [n] to quit. >')
if do_all == 'n':
exit()
if do_all == '' or do_all == 'a':
data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
'course[restrict_enrollments_to_course_dates]':True }
u2 = f"https://gavilan.instructure.com:443/api/v1/courses/{this_id}"
r3 = requests.put(u2, headers=header, params=data)
print(" updated.. OK")
"""if d_start.month == 1 and d_start.day == winter_start_day:
print("+ winter session: ", d_start, " - ", S['code'])
data = {'course[term_id]':WI_TERM}
u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
if make_changes:
r3 = requests.put(u2, headers=header, params=data)
print(" updated.. OK")
#print(r3.text)
continue"""
#if d_start.month == 1 and d_start.day == spring_start_day:
# # normal class
# continue
print("- Late start? ", start, d_start, " - ", S['code'], " ", S['crn'] )
if make_changes_LS:
data = {'course[start_at]':d_start.isoformat(), 'course[restrict_student_future_view]': True,
'course[restrict_enrollments_to_course_dates]':True }
u2 = "https://gavilan.instructure.com:443/api/v1/courses/%s" % crn_to_canvasid[S['crn']]
r3 = requests.put(u2, headers=header, params=data)
print(" updated.. OK")
return

212
flexday.py Normal file
View File

@ -0,0 +1,212 @@
import funcy, codecs, json, sys, csv, re
def user_db_sync():
# currently in db
conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
for e in ilrn_emails:
if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
def user_db_sync2():
#fetch all personnel dir entries from dir_api.php. PERSL unique emails
persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
#persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
#
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
#
#fetch all conf_users from dir_api.php CONUSR unique emails
conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both.
gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
all_emails = set(persl_emails)
all_emails.update(ilrn_emails)
all_emails.update(conusr_emails)
all_emails = list(all_emails)
all_emails.sort()
fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
fout.write('email,personnel_dir,ilearn,conf_user\n')
for e in all_emails:
if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
# goo (minus G00) email, and name go into conf_users
fout.write(e+',')
if e in persl_emails:
fout.write('1,')
else:
fout.write('0,')
if e in ilrn_emails:
fout.write('1,')
else:
fout.write('0,')
if e in conusr_emails:
fout.write('1,')
else:
fout.write('0,')
fout.write('\n')
fout.close()
#
#print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
print('done')
def get_best_user_record(rec_list):
# rule is lowest id is used, unless in list of exceptions
# key should be replaced with value. These ones don't follow typical lowest id rule.
exceptions = { 120: 883, # Gary Burce
538: 955, # Ronna de benedetti
127: 957, # Mia Cabello
802: 963, # binh vo
1053: 963,
923: 971, # brianna aguilar
933: 970, # elif konus
473: 879, # tania maheu
}
# sort records by id
s_recs = sorted(rec_list, key=sort_id)
preferred = s_recs[0]
# check for exceptions
if int(preferred['id']) in exceptions:
new_preferred_id = exceptions[int(preferred['id'])]
for r in rec_list:
if int(r['id']) == new_preferred_id:
preferred = r
break
return preferred
# Get dup rows like this:
# SELECT * FROM conf_users
# WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 )
# ORDER BY goo;
def correct_dup_user_rows():
'''
Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts:
- email is either with @gavilan.edu or without, or with @my.gavilan.edu
- but goo is correct
1. change login functions to look up GOO in conf_users
- still add new row if not present
2. Find dups
a. get lowest id (L), that is the correct one
b. for higher id (H), replace H with L in: conf_signups.user, conf_answers.user, conf_hosts.host, conf_logs <- abandonded gavi_logs <-- can't really
3. AND make a big overview page or report for all users/all years so I can check that records are complete
- person
- year or semester (conferences table)
- their signups, hostings
- there 'attended' and/or comments
'''
fname = 'cache/conf_users_dups.csv'
with open(fname, 'r') as f:
reader = csv.DictReader(f)
data = list(reader)
#print(data)
pairs = funcy.group_by(lambda r: r['goo'], data)
#print(json.dumps(pairs,indent=2))
counter = 0
for goo,recs in pairs.items():
if goo == "0":
continue # skip fake user
counter += 1
#emails = funcy.pluck('email',recs)
#print(list(emails))
#ids = funcy.pluck('id',recs)
#print(list(ids))
s_recs = sorted(recs, key=sort_id)
preferred = get_best_user_record(s_recs)
if 1:
for i,rec in enumerate(s_recs):
col1 = " "
if rec == preferred: col1 = " * "
# print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}")
s_recs.remove(preferred)
# Now loop through the non-preferred records, and update tables
for NP in s_recs:
#print(f"I want to remove conf_user id {NP['id']}")
print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};")
print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};")
print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};")
# SELECT * FROM conf_answers where user=1142
# SELECT * FROM conf_hosts where host=1142
#print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records")
#print()
#print(f"Total dups: {counter}")
def sort_id(a):
return int(a['id'])
if __name__ == "__main__":
print ("")
options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] ,
2: ['generate sql to fix conf_user dups', correct_dup_user_rows] ,
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()

View File

@ -341,6 +341,8 @@ def create_schedule_table_if_not_exists():
# Populate schedule table and correlate to courses table
def courses_to_sched():
# TODO: fix units when they are variable... change to float in between range. round to 0.5 unit.
EXECUTE = 1
seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}

View File

@ -30,7 +30,7 @@ from path_dict import PathDict
outputfile = ''
csvwriter = ''
TERM = 181
TERM = 183
def escape_commas(s):
@ -149,40 +149,43 @@ def ilearn_shell_slo_to_csv(shell_slos):
L.append("o%i_assd" % i)
df = pd.DataFrame(columns=L)
for S in shell_slos:
short = S[0]
this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
if len(S)>1:
full = S[1]
this_crs['has_outcomes'] = 1
try:
short = S[0]
this_crs = {'canvasid':short['ilearnid'], 'name':short['ilearnname'], 'has_outcomes':0, }
if len(S)>1:
full = S[1]
this_crs['has_outcomes'] = 1
i = 1
i = 1
for o in full['outcomes']:
try:
this_id = int(o['outcome']['id'])
this_crs['o%i_id' % i] = o['outcome']['id']
except Exception as e:
this_crs['o%i_id' % i] = '!'
try:
this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
except Exception as e:
this_crs['o%i_desc' % i] = '!'
try:
assessed = 0
if full['full_outcomes'][this_id]['assessed'] == 'True':
assessed = 1
this_crs['o%i_assd' % i] = assessed
except Exception as e:
this_crs['o%i_assd' % i] = '!'
try:
this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
except Exception as e:
this_crs['o%i_vendor_guid' % i] = '!'
for o in full['outcomes']:
try:
this_id = int(o['outcome']['id'])
this_crs['o%i_id' % i] = o['outcome']['id']
except Exception as e:
this_crs['o%i_id' % i] = '!'
try:
this_crs['o%i_desc' % i] = full['full_outcomes'][this_id]['description']
except Exception as e:
this_crs['o%i_desc' % i] = '!'
try:
assessed = 0
if full['full_outcomes'][this_id]['assessed'] == 'True':
assessed = 1
this_crs['o%i_assd' % i] = assessed
except Exception as e:
this_crs['o%i_assd' % i] = '!'
try:
this_crs['o%i_vendor_guid' % i] = full['full_outcomes'][this_id]['vendor_guid']
except Exception as e:
this_crs['o%i_vendor_guid' % i] = '!'
i += 1
i += 1
df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
df = pd.concat( [df, df2], ignore_index = True )
df2 = pd.DataFrame(this_crs, columns = df.columns, index=[0])
df = pd.concat( [df, df2], ignore_index = True )
except Exception as e:
print(f"*** Exception {e} with {S}\n\n")
df.to_csv('cache/outcome.csv')
print(df)

160
stats.py
View File

@ -1393,6 +1393,11 @@ def report_student_stats():
# Save the figure in an HTML file
pio.write_html(fig, 'cache/student_pct_onlinecourse.html')
def test_rpy():
pass
'''
def test_rpy():
from rpy2 import robjects
from rpy2.robjects import Formula, Environment
@ -1439,8 +1444,162 @@ def test_rpy2():
utils = importr('utils')
pi = robjects.r['pi']
print(f"pi={pi[0]}")
'''
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import silhouette_score
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import graphviz
from joblib import dump, load
def cluster_by_mode_1():
# Load the data from a CSV file
data = pd.read_csv('cache/students_bymode.csv')
# Extract the relevant features
features = data[['num_semesters', 'num_units', 'inperson_units', 'hybrid_units', 'online_units']]
# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Perform clustering with different numbers of clusters
for n_clusters in range(4, 12):
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(scaled_features)
# Add the cluster labels to the original data
data[f'cluster_{n_clusters}'] = kmeans.labels_
print(f"Clustering with {n_clusters} clusters:")
print(data.groupby(f'cluster_{n_clusters}').size())
print()
# Save the updated data with cluster labels to a new CSV file
data.to_csv('cache/students_bymode_with_clusters_1.csv', index=False)
def cluster_by_mode():
data = pd.read_csv('cache/students_bymode.csv')
# Split features and target
X = data.drop('g_number', axis=1)
y = data['g_number']
# Train decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X, y)
# Visualize decision tree
dot_data = export_graphviz(clf, out_file=None,
feature_names=X.columns,
class_names=y.unique(),
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph.render('decision_tree', view=True)
data.to_csv('cache/students_bymode_with_dt.csv', index=False)
def cluster_by_mode_2():
# Load the data from a CSV file
data = pd.read_csv('cache/students_bymode.csv')
# Extract the features (excluding the 'g_number' column)
features = data.drop('g_number', axis=1)
# Scale the features to have zero mean and unit variance
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Determine the ideal number of clusters using the elbow method
inertias = []
for k in range(4, 40): # Try different values of k (e.g., 1 to 10)
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(scaled_features)
inertias.append(kmeans.inertia_)
# Plot the elbow curve
import matplotlib.pyplot as plt
plt.plot(range(4, 40), inertias, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.show()
# Choose the ideal number of clusters based on the elbow curve
ideal_k = 12 # Adjust this based on your observation
# Perform clustering with the ideal number of clusters
kmeans = KMeans(n_clusters=ideal_k, random_state=42)
kmeans.fit(scaled_features)
# Get the cluster labels for each data point
labels = kmeans.labels_
# Add the cluster labels to the original data
data['Cluster'] = labels
# Save the cluster labels to a new CSV file
data.to_csv('cache/students_bymode_with_clusters_2.csv', index=False)
# Get the cluster centers (centroids)
centroids = scaler.inverse_transform(kmeans.cluster_centers_)
# Print the cluster centers
for i, centroid in enumerate(centroids):
print(f"Cluster {i} center:")
for feature, value in zip(features.columns, centroid):
print(f"{feature}: {value}")
print()
# Save the trained objects to files
dump(kmeans, 'kmeans.joblib')
dump(scaler, 'scaler.joblib')
# Load the saved objects for future use
loaded_kmeans = load('kmeans.joblib')
loaded_scaler = load('scaler.joblib')
# Use the loaded objects for predictions
new_data_scaled = loaded_scaler.transform(new_data)
predictions = loaded_kmeans.predict(new_data_scaled)
def cluster_with_new_data():
## NOT TESTED
# need to save the kmeans and scaler objects from previous step.
# Load the new data
new_data = pd.read_csv('new_data.csv')
# Extract the features from the new data
new_features = new_data.drop('g_number', axis=1)
# Scale the new features using the fitted scaler
scaled_new_features = scaler.transform(new_features)
# Predict the cluster labels for the new data
new_labels = kmeans.predict(scaled_new_features)
# Add the cluster labels to the new data
new_data['Cluster'] = new_labels
if __name__ == "__main__":
options = { 1: ['get all historical grades from ilearn',get_all] ,
2: ['process grades csv file',process_grades] ,
@ -1462,6 +1621,7 @@ if __name__ == "__main__":
30: ['visualize course modes multi semester', visualize_course_modes_multi_semester],
31: ['Report on student stats', report_student_stats],
32: ['test rpy', test_rpy],
33: ['cluster students by mode', cluster_by_mode],
}
print ('')

View File

@ -66,6 +66,14 @@ where (s.type='online' or s.type='hybrid' or s.type='online line')
and not cc.path='sstaff@gavilan.edu'
order by u.sortable_name;
-- names that are uppercase
SELECT *
FROM canvas.users
WHERE REGEXP_LIKE(name, '^[A-Z]+[[:space:]]')
order by last_logged_out ;
-- for outlook
select string_agg(distinct LOWER(cc.path), '; ') from canvas.courses c
@ -112,7 +120,12 @@ group by u.sortable_name
order by total desc, online desc, onlinelive desc, hybrid desc;
-- find (fix?) rows where units are variable
SELECT * FROM canvas.schedule
WHERE units LIKE '%-%';
SELECT * FROM canvas.schedule
WHERE units LIKE '%/%';
-- num units
select u.sortable_name, p.sis_user_id,
@ -145,6 +158,76 @@ order by total desc;
-- students.csv: each student, num_semesters, num_units, num_f2f, num_online, num_hybrid
SELECT
p.sis_user_id as g_number,
COUNT(DISTINCT s.sem) AS num_semesters,
SUM(s.units::FLOAT) AS num_units,
sum(CASE WHEN s.type = 'in-person' THEN s.units::FLOAT ELSE 0 end) AS inperson_units,
sum(CASE WHEN s.type = 'hybrid' THEN s.units::FLOAT ELSE 0 end) AS hybrid_units,
sum(CASE WHEN s.type = 'online' or s.type = 'online live' THEN s.units::FLOAT ELSE 0 end) AS online_units
FROM
canvas.users u
JOIN canvas.enrollments e ON u.id = e.user_id
JOIN canvas.courses c ON e.course_id = c.id
JOIN canvas.schedule s ON c.id = s.canvascourse
JOIN canvas.pseudonyms p ON u.id = p.user_id
WHERE
e.workflow_state = 'active'
AND e.type = 'StudentEnrollment'
AND u.id IN (
SELECT u.id FROM canvas.enrollments AS e
JOIN canvas.users AS u ON e.user_id=u.id
JOIN canvas.courses AS c ON e.course_id=c.id
WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
AND e.workflow_state='active'
AND e.type='StudentEnrollment'
GROUP BY u.id
)
GROUP BY
p.sis_user_id
ORDER BY
num_semesters, p.sis_user_id;
-- students.csv: each student, num_semesters, num_units
SELECT
p.sis_user_id as g_number,
COUNT(DISTINCT s.sem) AS num_semesters,
SUM(s.units::FLOAT) AS num_units
FROM
canvas.users u
JOIN canvas.enrollments e ON u.id = e.user_id
JOIN canvas.courses c ON e.course_id = c.id
JOIN canvas.schedule s ON c.id = s.canvascourse
JOIN canvas.pseudonyms p ON u.id = p.user_id
WHERE
e.workflow_state = 'active'
AND e.type = 'StudentEnrollment'
AND u.id IN (
SELECT u.id FROM canvas.enrollments AS e
JOIN canvas.users AS u ON e.user_id=u.id
JOIN canvas.courses AS c ON e.course_id=c.id
WHERE (c.sis_source_id LIKE '202450-%%' or c.sis_source_id LIKE '202470-%%')
AND e.workflow_state='active'
AND e.type='StudentEnrollment'
GROUP BY u.id
)
GROUP BY
p.sis_user_id
ORDER BY
num_semesters, p.sis_user_id;
-- each class
select u.sortable_name, c.course_code, s.type, s.units::FLOAT

View File

@ -1796,8 +1796,8 @@ def track_user(id=0,qid=0):
url_addition = ""
if 1: # hard code dates
start_date = "2023-08-01T00:00:00-07:00"
end_date = "2024-01-01T00:00:00-07:00"
start_date = "2024-01-01T00:00:00-07:00"
end_date = "2024-07-01T00:00:00-07:00"
url_addition = f"?start_time={start_date}&end_time={end_date}"
elif 'last_days_log' in info:
print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))
@ -2041,80 +2041,6 @@ def find_new_teachers():
for J in jj:
print( J['teacher'])
def user_db_sync():
# currently in db
conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
for e in ilrn_emails:
if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
def user_db_sync2():
#fetch all personnel dir entries from dir_api.php. PERSL unique emails
persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
#persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
#
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
#
#fetch all conf_users from dir_api.php CONUSR unique emails
conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both.
gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
all_emails = set(persl_emails)
all_emails.update(ilrn_emails)
all_emails.update(conusr_emails)
all_emails = list(all_emails)
all_emails.sort()
fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
fout.write('email,personnel_dir,ilearn,conf_user\n')
for e in all_emails:
if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
# goo (minus G00) email, and name go into conf_users
fout.write(e+',')
if e in persl_emails:
fout.write('1,')
else:
fout.write('0,')
if e in ilrn_emails:
fout.write('1,')
else:
fout.write('0,')
if e in conusr_emails:
fout.write('1,')
else:
fout.write('0,')
fout.write('\n')
fout.close()
#
#print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
print('done')
import traceback