canvasapp/flexday.py

416 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import funcy, codecs, json, sys, csv, re, requests
from localcache2 import user_from_goo
from canvas_secrets import url
from pipelines import fetch
from users import getEmail
import util
def user_db_sync():
# currently in db
conusr = fetch("http://deep1:8080/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
for e in ilrn_emails:
if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
# no longer relevant cause we don't use personnel table anymore
def user_db_sync2():
#fetch all personnel dir entries from dir_api.php. PERSL unique emails
persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
#persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
#
#fetch all staff from ilearn ILRN unique emails
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
#
#fetch all conf_users from dir_api.php CONUSR unique emails
conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
#fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both.
gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
all_emails = set(persl_emails)
all_emails.update(ilrn_emails)
all_emails.update(conusr_emails)
all_emails = list(all_emails)
all_emails.sort()
fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
fout.write('email,personnel_dir,ilearn,conf_user\n')
for e in all_emails:
if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
E = funcy.first(funcy.where(ilrn,email=e))
goo = E['login_id'][3:]
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
# goo (minus G00) email, and name go into conf_users
fout.write(e+',')
if e in persl_emails:
fout.write('1,')
else:
fout.write('0,')
if e in ilrn_emails:
fout.write('1,')
else:
fout.write('0,')
if e in conusr_emails:
fout.write('1,')
else:
fout.write('0,')
fout.write('\n')
fout.close()
#
#print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
print('done')
def get_best_user_record(rec_list):
# rule is lowest id is used, unless in list of exceptions
# key should be replaced with value. These ones don't follow typical lowest id rule.
exceptions = { 120: 883, # Gary Burce
538: 955, # Ronna de benedetti
127: 957, # Mia Cabello
802: 963, # binh vo
1053: 963,
923: 971, # brianna aguilar
933: 970, # elif konus
473: 879, # tania maheu
}
# sort records by id
s_recs = sorted(rec_list, key=sort_id)
preferred = s_recs[0]
# check for exceptions
if int(preferred['id']) in exceptions:
new_preferred_id = exceptions[int(preferred['id'])]
for r in rec_list:
if int(r['id']) == new_preferred_id:
preferred = r
break
return preferred
# Get dup rows like this:
# SELECT * FROM conf_users
# WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 )
# ORDER BY goo;
def correct_dup_user_rows():
'''
Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts:
- email is either with @gavilan.edu or without, or with @my.gavilan.edu
- but goo is correct
1. change login functions to look up GOO in conf_users
- still add new row if not present
2. Find dups
a. get lowest id (L), that is the correct one
b. for higher id (H), replace H with L in: conf_signups.user, conf_answers.user, conf_hosts.host, conf_logs <- abandonded gavi_logs <-- can't really
3. AND make a big overview page or report for all users/all years so I can check that records are complete
- person
- year or semester (conferences table)
- their signups, hostings
- there 'attended' and/or comments
'''
fname = 'cache/conf_users_dups.csv'
with open(fname, 'r') as f:
reader = csv.DictReader(f)
data = list(reader)
#print(data)
pairs = funcy.group_by(lambda r: r['goo'], data)
#print(json.dumps(pairs,indent=2))
counter = 0
for goo,recs in pairs.items():
if goo == "0":
continue # skip fake user
counter += 1
#emails = funcy.pluck('email',recs)
#print(list(emails))
#ids = funcy.pluck('id',recs)
#print(list(ids))
s_recs = sorted(recs, key=sort_id)
preferred = get_best_user_record(s_recs)
if 1:
for i,rec in enumerate(s_recs):
col1 = " "
if rec == preferred: col1 = " * "
# print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}")
s_recs.remove(preferred)
# Now loop through the non-preferred records, and update tables
for NP in s_recs:
#print(f"I want to remove conf_user id {NP['id']}")
print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};")
print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};")
print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};")
print(f"DELETE FROM conf_users WHERE id={NP['id']};")
# SELECT * FROM conf_answers where user=1142
# SELECT * FROM conf_hosts where host=1142
#print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records")
#print()
#print(f"Total dups: {counter}")
def sort_id(a):
return int(a['id'])
def search_user(searchterm=''):
if not searchterm:
searchterm = input('search term: ')
u = url + f"/api/v1/accounts/self/users?search_term={searchterm}"
response = fetch(u)
for R in response:
R['email'] = getEmail(R['id'])
#print(json.dumps(response, indent=2))
return response
def search_and_select_user(searchterm):
candidates = search_user(searchterm)
if len(candidates) == 0: return 0
if len(candidates) == 1: return candidates[0]
for i,c in enumerate(candidates):
print(f" {i+1}: {c['name']} \t {c['sis_user_id']} \t {c['email']} \t {c['created_at']}")
choice = int(input('which user (0 for none)? '))
if choice == 0: return 0
return candidates[choice-1]
def find_unnamed_people():
if 0:
suffix = "_20220907"
ilearn_users = json.loads( codecs.open(f'cache/allusers{suffix}.json','r','utf-8').read() )
ilu_by_goo = {}
for U in ilearn_users:
if 'sis_user_id' in U and U['sis_user_id']:
g = U['sis_user_id']
g = g[3:]
ilu_by_goo[g] = U
#print(json.dumps(ilu_by_goo,indent=2))
outfile = codecs.open(f'cache/allusers_by_goo{suffix}.json','w','utf-8')
outfile.write( json.dumps(ilu_by_goo,indent=2))
# get conf_users from flex day site
url = "http://hhh.gavilan.edu/phowell/dir/api2.php?query=users"
all_users = json.loads(requests.get(url, verify=False).content)
unfixed = []
unfound_goos = []
for A in all_users['data']:
if A['name'] == "":
found_name = "*"
record = user_from_goo(A['goo'])
if record and 'name' in record:
#print(record)
#if A['goo'] in ilu_by_goo:
#found_name = ilu_by_goo[A['goo']]['name']
found_name = record['name']
desc = f"Goo: {A['goo']}\t email: {A['email']} \t new name: {found_name}"
if found_name != '*':
print(f"UPDATE conf_users SET name='{found_name}' WHERE goo='{A['goo']}';")
else:
unfixed.append(desc)
unfound_goos.append(A['goo'])
print()
queries = []
for i,g in enumerate(unfound_goos):
print(g)
choice = search_and_select_user(g)
if choice != 0:
qry = f"UPDATE conf_users SET name='{choice['name']}' WHERE goo='{g}';"
queries.append(qry)
print()
for Q in queries:
print(Q)
import pandas as pd
# Function to generate SQL INSERT statements
def generate_insert_statements(table_name='conf_sessions'):
# Read the CSV into a pandas DataFrame
df = pd.read_csv('cache/flexsessions.csv')
# Drop the columns 'date' and 'Start' (case-sensitive)
df = df.drop(columns=['date', 'Start'], errors='ignore')
# Drop any columns that are unnamed (such as 'Unnamed: 8')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
insert_statements = []
# Iterate over each row in the DataFrame
for index, row in df.iterrows():
columns = ", ".join(df.columns) # Get column names from the DataFrame
values = ", ".join(["'{0}'".format(str(value).replace("'", "''")) for value in row.values]) # Escape single quotes in values
# Construct the SQL INSERT statement
#insert_statement = f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
#insert_statements.append(insert_statement)
set_clause = ", ".join(["`{}`='{}'".format(col, str(value).replace("'", "''")) for col, value in row.items()])
# Construct the SQL INSERT statement using SET
insert_statement = f"INSERT INTO {table_name} SET {set_clause};"
insert_statements.append(insert_statement)
for S in insert_statements:
print(S)
return insert_statements
def cross_check_users():
"""
Reads:
- ilearn_staff.json (list of Canvas users; uses fields: sis_user_id, name, email, login_id)
- conf_users.csv (existing rows; headers include: goo)
Writes:
- conf_users_inserts.sql with INSERTs for users missing from conf_users
Assumptions:
- "goo" is sis_user_id with the leading 'G00' removed (e.g., G00138124 -> 138124).
- Skip deactivated users (name contains "(Deactivated)" or login_id == "deactivated").
- Skip users whose sis_user_id doesnt match G00\d+.
- New rows default to active=0 and p2id=NULL.
"""
import json, csv, re
STAFF_JSON = "cache/ilearn_staff.json"
CONF_CSV = "cache/conf_users.csv"
OUT_SQL = "cache/conf_users_inserts.sql"
TABLE = "conf_users"
# ---- Load existing goo set from conf_users.csv ----
existing_goos = set()
with open(CONF_CSV, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
s = (row.get("goo") or "").strip()
if s.isdigit():
existing_goos.add(int(s))
# ---- Load Canvas staff ----
with open(STAFF_JSON, "r", encoding="utf-8") as f:
users = json.load(f)
def esc(s):
if s is None:
return "NULL"
return "'" + s.replace("'", "''") + "'"
inserts = []
scanned = 0
skipped_deactivated = 0
skipped_bad_sis = 0
missing = 0
for u in users:
scanned += 1
name = (u.get("name") or "").strip()
login = (u.get("login_id") or "").strip()
sis = (u.get("sis_user_id") or "").strip()
# Skip non G00-form sis_user_id
m = re.fullmatch(r"G00(\d+)", sis)
if not m:
skipped_bad_sis += 1
continue
# Skip deactivated
#low_name = name.lower()
#if "(deactivated)" in low_name or login.lower() == "deactivated":
# skipped_deactivated += 1
# continue
goo = int(m.group(1))
if goo in existing_goos:
continue
email = (u.get("email") or "").strip() or None
sql = (
f"INSERT INTO `{TABLE}` (goo, email, name, active, p2id) "
f"VALUES ({goo}, {esc(email)}, {esc(name)}, 0, NULL);"
)
inserts.append(sql)
missing += 1
with open(OUT_SQL, "w", encoding="utf-8") as f:
f.write("-- Generated INSERTs for missing conf_users rows\n")
f.write("\n".join(inserts))
f.write("\n")
print(f"Wrote {missing} INSERTs to {OUT_SQL}")
print(f"Scanned: {scanned} | Existing goos: {len(existing_goos)} | Skipped deactivated: {skipped_deactivated} | Skipped bad sis: {skipped_bad_sis}")
if __name__ == "__main__":
print ("")
options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] ,
2: ['generate sql to fix conf_user dups', correct_dup_user_rows] ,
3: ['add names to new accounts', find_unnamed_people],
4: ['search for user', search_user],
5: ['generate insert statements', generate_insert_statements ],
6: ['cross check users', cross_check_users ],
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()