415 lines
14 KiB
Python
415 lines
14 KiB
Python
import funcy, codecs, json, sys, csv, re, requests
|
||
from localcache2 import user_from_goo
|
||
from canvas_secrets import url
|
||
from pipelines import fetch
|
||
from users import getEmail
|
||
|
||
|
||
def user_db_sync():
|
||
# currently in db
|
||
conusr = fetch("http://deep1:8080/dir_api.php?users=1")
|
||
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
|
||
|
||
#fetch all staff from ilearn ILRN unique emails
|
||
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
|
||
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
|
||
|
||
for e in ilrn_emails:
|
||
|
||
if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
|
||
E = funcy.first(funcy.where(ilrn,email=e))
|
||
goo = E['login_id'][3:]
|
||
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
|
||
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
|
||
|
||
|
||
# no longer relevant cause we don't use personnel table anymore
|
||
def user_db_sync2():
|
||
#fetch all personnel dir entries from dir_api.php. PERSL unique emails
|
||
persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
|
||
persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
|
||
#persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
|
||
#
|
||
#fetch all staff from ilearn ILRN unique emails
|
||
ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
|
||
ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
|
||
#
|
||
#fetch all conf_users from dir_api.php CONUSR unique emails
|
||
conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
|
||
conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])
|
||
|
||
#fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both.
|
||
gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")
|
||
|
||
all_emails = set(persl_emails)
|
||
all_emails.update(ilrn_emails)
|
||
all_emails.update(conusr_emails)
|
||
|
||
all_emails = list(all_emails)
|
||
all_emails.sort()
|
||
|
||
fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
|
||
fout.write('email,personnel_dir,ilearn,conf_user\n')
|
||
for e in all_emails:
|
||
|
||
if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
|
||
E = funcy.first(funcy.where(ilrn,email=e))
|
||
goo = E['login_id'][3:]
|
||
#print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) )
|
||
print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )
|
||
|
||
# goo (minus G00) email, and name go into conf_users
|
||
|
||
fout.write(e+',')
|
||
if e in persl_emails:
|
||
fout.write('1,')
|
||
else:
|
||
fout.write('0,')
|
||
if e in ilrn_emails:
|
||
fout.write('1,')
|
||
else:
|
||
fout.write('0,')
|
||
if e in conusr_emails:
|
||
fout.write('1,')
|
||
else:
|
||
fout.write('0,')
|
||
fout.write('\n')
|
||
fout.close()
|
||
#
|
||
|
||
#print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
|
||
print('done')
|
||
|
||
|
||
def get_best_user_record(rec_list):
|
||
# rule is lowest id is used, unless in list of exceptions
|
||
|
||
# key should be replaced with value. These ones don't follow typical lowest id rule.
|
||
exceptions = { 120: 883, # Gary Burce
|
||
538: 955, # Ronna de benedetti
|
||
127: 957, # Mia Cabello
|
||
802: 963, # binh vo
|
||
1053: 963,
|
||
923: 971, # brianna aguilar
|
||
933: 970, # elif konus
|
||
473: 879, # tania maheu
|
||
}
|
||
|
||
# sort records by id
|
||
s_recs = sorted(rec_list, key=sort_id)
|
||
preferred = s_recs[0]
|
||
|
||
# check for exceptions
|
||
if int(preferred['id']) in exceptions:
|
||
new_preferred_id = exceptions[int(preferred['id'])]
|
||
for r in rec_list:
|
||
if int(r['id']) == new_preferred_id:
|
||
preferred = r
|
||
break
|
||
|
||
return preferred
|
||
|
||
|
||
|
||
# Get dup rows like this:
|
||
# SELECT * FROM conf_users
|
||
# WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 )
|
||
# ORDER BY goo;
|
||
|
||
|
||
|
||
def correct_dup_user_rows():
|
||
'''
|
||
Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts:
|
||
- email is either with @gavilan.edu or without, or with @my.gavilan.edu
|
||
- but goo is correct
|
||
|
||
1. change login functions to look up GOO in conf_users
|
||
- still add new row if not present
|
||
2. Find dups
|
||
a. get lowest id (L), that is the correct one
|
||
b. for higher id (H), replace H with L in: conf_signups.user, conf_answers.user, conf_hosts.host, conf_logs <- abandonded gavi_logs <-- can't really
|
||
|
||
3. AND make a big overview page or report for all users/all years so I can check that records are complete
|
||
- person
|
||
- year or semester (conferences table)
|
||
- their signups, hostings
|
||
- there 'attended' and/or comments
|
||
|
||
'''
|
||
fname = 'cache/conf_users_dups.csv'
|
||
with open(fname, 'r') as f:
|
||
reader = csv.DictReader(f)
|
||
data = list(reader)
|
||
#print(data)
|
||
pairs = funcy.group_by(lambda r: r['goo'], data)
|
||
#print(json.dumps(pairs,indent=2))
|
||
|
||
counter = 0
|
||
|
||
|
||
|
||
for goo,recs in pairs.items():
|
||
if goo == "0":
|
||
continue # skip fake user
|
||
counter += 1
|
||
|
||
#emails = funcy.pluck('email',recs)
|
||
#print(list(emails))
|
||
|
||
#ids = funcy.pluck('id',recs)
|
||
#print(list(ids))
|
||
|
||
s_recs = sorted(recs, key=sort_id)
|
||
preferred = get_best_user_record(s_recs)
|
||
|
||
if 1:
|
||
for i,rec in enumerate(s_recs):
|
||
col1 = " "
|
||
if rec == preferred: col1 = " * "
|
||
# print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}")
|
||
|
||
s_recs.remove(preferred)
|
||
|
||
# Now loop through the non-preferred records, and update tables
|
||
for NP in s_recs:
|
||
#print(f"I want to remove conf_user id {NP['id']}")
|
||
print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};")
|
||
print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};")
|
||
print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};")
|
||
print(f"DELETE FROM conf_users WHERE id={NP['id']};")
|
||
# SELECT * FROM conf_answers where user=1142
|
||
# SELECT * FROM conf_hosts where host=1142
|
||
|
||
#print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records")
|
||
#print()
|
||
#print(f"Total dups: {counter}")
|
||
|
||
|
||
|
||
def sort_id(a):
|
||
return int(a['id'])
|
||
|
||
|
||
def search_user(searchterm=''):
|
||
if not searchterm:
|
||
searchterm = input('search term: ')
|
||
u = url + f"/api/v1/accounts/self/users?search_term={searchterm}"
|
||
response = fetch(u)
|
||
for R in response:
|
||
R['email'] = getEmail(R['id'])
|
||
#print(json.dumps(response, indent=2))
|
||
return response
|
||
|
||
def search_and_select_user(searchterm):
|
||
candidates = search_user(searchterm)
|
||
if len(candidates) == 0: return 0
|
||
if len(candidates) == 1: return candidates[0]
|
||
|
||
for i,c in enumerate(candidates):
|
||
print(f" {i+1}: {c['name']} \t {c['sis_user_id']} \t {c['email']} \t {c['created_at']}")
|
||
choice = int(input('which user (0 for none)? '))
|
||
if choice == 0: return 0
|
||
return candidates[choice-1]
|
||
|
||
def find_unnamed_people():
|
||
|
||
if 0:
|
||
suffix = "_20220907"
|
||
ilearn_users = json.loads( codecs.open(f'cache/allusers{suffix}.json','r','utf-8').read() )
|
||
ilu_by_goo = {}
|
||
for U in ilearn_users:
|
||
if 'sis_user_id' in U and U['sis_user_id']:
|
||
g = U['sis_user_id']
|
||
g = g[3:]
|
||
ilu_by_goo[g] = U
|
||
#print(json.dumps(ilu_by_goo,indent=2))
|
||
outfile = codecs.open(f'cache/allusers_by_goo{suffix}.json','w','utf-8')
|
||
outfile.write( json.dumps(ilu_by_goo,indent=2))
|
||
|
||
# get conf_users from flex day site
|
||
url = "http://hhh.gavilan.edu/phowell/dir/api2.php?query=users"
|
||
all_users = json.loads(requests.get(url, verify=False).content)
|
||
unfixed = []
|
||
unfound_goos = []
|
||
for A in all_users['data']:
|
||
if A['name'] == "":
|
||
found_name = "*"
|
||
record = user_from_goo(A['goo'])
|
||
if record and 'name' in record:
|
||
#print(record)
|
||
#if A['goo'] in ilu_by_goo:
|
||
#found_name = ilu_by_goo[A['goo']]['name']
|
||
found_name = record['name']
|
||
desc = f"Goo: {A['goo']}\t email: {A['email']} \t new name: {found_name}"
|
||
if found_name != '*':
|
||
print(f"UPDATE conf_users SET name='{found_name}' WHERE goo='{A['goo']}';")
|
||
else:
|
||
unfixed.append(desc)
|
||
unfound_goos.append(A['goo'])
|
||
print()
|
||
|
||
queries = []
|
||
for i,g in enumerate(unfound_goos):
|
||
print(g)
|
||
choice = search_and_select_user(g)
|
||
if choice != 0:
|
||
qry = f"UPDATE conf_users SET name='{choice['name']}' WHERE goo='{g}';"
|
||
queries.append(qry)
|
||
print()
|
||
for Q in queries:
|
||
print(Q)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
import pandas as pd
|
||
|
||
# Function to generate SQL INSERT statements
|
||
def generate_insert_statements(table_name='conf_sessions'):
|
||
# Read the CSV into a pandas DataFrame
|
||
df = pd.read_csv('cache/flexsessions.csv')
|
||
|
||
# Drop the columns 'date' and 'Start' (case-sensitive)
|
||
df = df.drop(columns=['date', 'Start'], errors='ignore')
|
||
|
||
# Drop any columns that are unnamed (such as 'Unnamed: 8')
|
||
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
||
|
||
insert_statements = []
|
||
|
||
# Iterate over each row in the DataFrame
|
||
for index, row in df.iterrows():
|
||
columns = ", ".join(df.columns) # Get column names from the DataFrame
|
||
values = ", ".join(["'{0}'".format(str(value).replace("'", "''")) for value in row.values]) # Escape single quotes in values
|
||
|
||
# Construct the SQL INSERT statement
|
||
#insert_statement = f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
|
||
#insert_statements.append(insert_statement)
|
||
|
||
set_clause = ", ".join(["`{}`='{}'".format(col, str(value).replace("'", "''")) for col, value in row.items()])
|
||
|
||
# Construct the SQL INSERT statement using SET
|
||
insert_statement = f"INSERT INTO {table_name} SET {set_clause};"
|
||
insert_statements.append(insert_statement)
|
||
|
||
|
||
for S in insert_statements:
|
||
print(S)
|
||
return insert_statements
|
||
|
||
def cross_check_users():
|
||
"""
|
||
Reads:
|
||
- ilearn_staff.json (list of Canvas users; uses fields: sis_user_id, name, email, login_id)
|
||
- conf_users.csv (existing rows; headers include: goo)
|
||
Writes:
|
||
- conf_users_inserts.sql with INSERTs for users missing from conf_users
|
||
|
||
Assumptions:
|
||
- "goo" is sis_user_id with the leading 'G00' removed (e.g., G00138124 -> 138124).
|
||
- Skip deactivated users (name contains "(Deactivated)" or login_id == "deactivated").
|
||
- Skip users whose sis_user_id doesn’t match G00\d+.
|
||
- New rows default to active=0 and p2id=NULL.
|
||
"""
|
||
|
||
import json, csv, re
|
||
|
||
STAFF_JSON = "cache/ilearn_staff.json"
|
||
CONF_CSV = "cache/conf_users.csv"
|
||
OUT_SQL = "cache/conf_users_inserts.sql"
|
||
TABLE = "conf_users"
|
||
|
||
# ---- Load existing goo set from conf_users.csv ----
|
||
existing_goos = set()
|
||
with open(CONF_CSV, newline="", encoding="utf-8") as f:
|
||
reader = csv.DictReader(f)
|
||
for row in reader:
|
||
s = (row.get("goo") or "").strip()
|
||
if s.isdigit():
|
||
existing_goos.add(int(s))
|
||
|
||
# ---- Load Canvas staff ----
|
||
with open(STAFF_JSON, "r", encoding="utf-8") as f:
|
||
users = json.load(f)
|
||
|
||
def esc(s):
|
||
if s is None:
|
||
return "NULL"
|
||
return "'" + s.replace("'", "''") + "'"
|
||
|
||
inserts = []
|
||
scanned = 0
|
||
skipped_deactivated = 0
|
||
skipped_bad_sis = 0
|
||
missing = 0
|
||
|
||
for u in users:
|
||
scanned += 1
|
||
name = (u.get("name") or "").strip()
|
||
login = (u.get("login_id") or "").strip()
|
||
sis = (u.get("sis_user_id") or "").strip()
|
||
|
||
# Skip non G00-form sis_user_id
|
||
m = re.fullmatch(r"G00(\d+)", sis)
|
||
if not m:
|
||
skipped_bad_sis += 1
|
||
continue
|
||
|
||
# Skip deactivated
|
||
#low_name = name.lower()
|
||
#if "(deactivated)" in low_name or login.lower() == "deactivated":
|
||
# skipped_deactivated += 1
|
||
# continue
|
||
|
||
goo = int(m.group(1))
|
||
if goo in existing_goos:
|
||
continue
|
||
|
||
email = (u.get("email") or "").strip() or None
|
||
|
||
sql = (
|
||
f"INSERT INTO `{TABLE}` (goo, email, name, active, p2id) "
|
||
f"VALUES ({goo}, {esc(email)}, {esc(name)}, 0, NULL);"
|
||
)
|
||
inserts.append(sql)
|
||
missing += 1
|
||
|
||
with open(OUT_SQL, "w", encoding="utf-8") as f:
|
||
f.write("-- Generated INSERTs for missing conf_users rows\n")
|
||
f.write("\n".join(inserts))
|
||
f.write("\n")
|
||
|
||
print(f"Wrote {missing} INSERTs to {OUT_SQL}")
|
||
print(f"Scanned: {scanned} | Existing goos: {len(existing_goos)} | Skipped deactivated: {skipped_deactivated} | Skipped bad sis: {skipped_bad_sis}")
|
||
|
||
if __name__ == "__main__":
|
||
print ("")
|
||
options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] ,
|
||
2: ['generate sql to fix conf_user dups', correct_dup_user_rows] ,
|
||
3: ['add names to new accounts', find_unnamed_people],
|
||
4: ['search for user', search_user],
|
||
5: ['generate insert statements', generate_insert_statements ],
|
||
6: ['cross check users', cross_check_users ],
|
||
}
|
||
|
||
|
||
|
||
|
||
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
||
resp = int(sys.argv[1])
|
||
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
||
|
||
else:
|
||
print ('')
|
||
for key in options:
|
||
print(str(key) + '.\t' + options[key][0])
|
||
|
||
print('')
|
||
resp = input('Choose: ')
|
||
|
||
# Call the function in the options dict
|
||
options[ int(resp)][1]()
|