import funcy, codecs, json, sys, csv, re, requests from localcache2 import user_from_goo from canvas_secrets import url from pipelines import fetch from users import getEmail def user_db_sync(): # currently in db conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1") conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)]) #fetch all staff from ilearn ILRN unique emails ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read()) ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)]) for e in ilrn_emails: if not (e in conusr_emails) and e.endswith('@gavilan.edu'): E = funcy.first(funcy.where(ilrn,email=e)) goo = E['login_id'][3:] #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) ) print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) ) def user_db_sync2(): #fetch all personnel dir entries from dir_api.php. PERSL unique emails persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1") persl_emails = set([x.lower() for x in funcy.pluck('email',persl)]) #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)]) # #fetch all staff from ilearn ILRN unique emails ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read()) ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)]) # #fetch all conf_users from dir_api.php CONUSR unique emails conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1") conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)]) #fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both. gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1") all_emails = set(persl_emails) all_emails.update(ilrn_emails) all_emails.update(conusr_emails) all_emails = list(all_emails) all_emails.sort() fout = codecs.open('cache/db_staff_report.csv','w','utf-8') fout.write('email,personnel_dir,ilearn,conf_user\n') for e in all_emails: if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'): E = funcy.first(funcy.where(ilrn,email=e)) goo = E['login_id'][3:] #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) ) print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) ) # goo (minus G00) email, and name go into conf_users fout.write(e+',') if e in persl_emails: fout.write('1,') else: fout.write('0,') if e in ilrn_emails: fout.write('1,') else: fout.write('0,') if e in conusr_emails: fout.write('1,') else: fout.write('0,') fout.write('\n') fout.close() # #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) ) print('done') def get_best_user_record(rec_list): # rule is lowest id is used, unless in list of exceptions # key should be replaced with value. These ones don't follow typical lowest id rule. exceptions = { 120: 883, # Gary Burce 538: 955, # Ronna de benedetti 127: 957, # Mia Cabello 802: 963, # binh vo 1053: 963, 923: 971, # brianna aguilar 933: 970, # elif konus 473: 879, # tania maheu } # sort records by id s_recs = sorted(rec_list, key=sort_id) preferred = s_recs[0] # check for exceptions if int(preferred['id']) in exceptions: new_preferred_id = exceptions[int(preferred['id'])] for r in rec_list: if int(r['id']) == new_preferred_id: preferred = r break return preferred # Get dup rows like this: # SELECT * FROM conf_users # WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 ) # ORDER BY goo; def correct_dup_user_rows(): ''' Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts: - email is either with @gavilan.edu or without, or with @my.gavilan.edu - but goo is correct 1. change login functions to look up GOO in conf_users - still add new row if not present 2. Find dups a. get lowest id (L), that is the correct one b. for higher id (H), replace H with L in: conf_signups.user, conf_answers.user, conf_hosts.host, conf_logs <- abandonded gavi_logs <-- can't really 3. AND make a big overview page or report for all users/all years so I can check that records are complete - person - year or semester (conferences table) - their signups, hostings - there 'attended' and/or comments ''' fname = 'cache/conf_users_dups.csv' with open(fname, 'r') as f: reader = csv.DictReader(f) data = list(reader) #print(data) pairs = funcy.group_by(lambda r: r['goo'], data) #print(json.dumps(pairs,indent=2)) counter = 0 for goo,recs in pairs.items(): if goo == "0": continue # skip fake user counter += 1 #emails = funcy.pluck('email',recs) #print(list(emails)) #ids = funcy.pluck('id',recs) #print(list(ids)) s_recs = sorted(recs, key=sort_id) preferred = get_best_user_record(s_recs) if 1: for i,rec in enumerate(s_recs): col1 = " " if rec == preferred: col1 = " * " # print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}") s_recs.remove(preferred) # Now loop through the non-preferred records, and update tables for NP in s_recs: #print(f"I want to remove conf_user id {NP['id']}") print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};") print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};") print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};") print(f"DELETE FROM conf_users WHERE id={NP['id']};") # SELECT * FROM conf_answers where user=1142 # SELECT * FROM conf_hosts where host=1142 #print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records") #print() #print(f"Total dups: {counter}") def sort_id(a): return int(a['id']) def search_user(searchterm=''): if not searchterm: searchterm = input('search term: ') u = url + f"/api/v1/accounts/self/users?search_term={searchterm}" response = fetch(u) for R in response: R['email'] = getEmail(R['id']) #print(json.dumps(response, indent=2)) return response def search_and_select_user(searchterm): candidates = search_user(searchterm) if len(candidates) == 0: return 0 if len(candidates) == 1: return candidates[0] for i,c in enumerate(candidates): print(f" {i+1}: {c['name']} \t {c['sis_user_id']} \t {c['email']} \t {c['created_at']}") choice = int(input('which user (0 for none)? ')) if choice == 0: return 0 return candidates[choice-1] def find_unnamed_people(): if 0: suffix = "_20220907" ilearn_users = json.loads( codecs.open(f'cache/allusers{suffix}.json','r','utf-8').read() ) ilu_by_goo = {} for U in ilearn_users: if 'sis_user_id' in U and U['sis_user_id']: g = U['sis_user_id'] g = g[3:] ilu_by_goo[g] = U #print(json.dumps(ilu_by_goo,indent=2)) outfile = codecs.open(f'cache/allusers_by_goo{suffix}.json','w','utf-8') outfile.write( json.dumps(ilu_by_goo,indent=2)) # get conf_users from flex day site url = "http://hhh.gavilan.edu/phowell/dir/api2.php?query=users" all_users = json.loads(requests.get(url, verify=False).content) unfixed = [] unfound_goos = [] for A in all_users['data']: if A['name'] == "": found_name = "*" record = user_from_goo(A['goo']) if record and 'name' in record: #print(record) #if A['goo'] in ilu_by_goo: #found_name = ilu_by_goo[A['goo']]['name'] found_name = record['name'] desc = f"Goo: {A['goo']}\t email: {A['email']} \t new name: {found_name}" if found_name != '*': print(f"UPDATE conf_users SET name='{found_name}' WHERE goo='{A['goo']}';") else: unfixed.append(desc) unfound_goos.append(A['goo']) print() queries = [] for i,g in enumerate(unfound_goos): print(g) choice = search_and_select_user(g) if choice != 0: qry = f"UPDATE conf_users SET name='{choice['name']}' WHERE goo='{g}';" queries.append(qry) print() for Q in queries: print(Q) import pandas as pd # Function to generate SQL INSERT statements def generate_insert_statements(table_name='conf_sessions'): # Read the CSV into a pandas DataFrame df = pd.read_csv('cache/flexsessions.csv') # Drop the columns 'date' and 'Start' (case-sensitive) df = df.drop(columns=['date', 'Start'], errors='ignore') # Drop any columns that are unnamed (such as 'Unnamed: 8') df = df.loc[:, ~df.columns.str.contains('^Unnamed')] insert_statements = [] # Iterate over each row in the DataFrame for index, row in df.iterrows(): columns = ", ".join(df.columns) # Get column names from the DataFrame values = ", ".join(["'{0}'".format(str(value).replace("'", "''")) for value in row.values]) # Escape single quotes in values # Construct the SQL INSERT statement #insert_statement = f"INSERT INTO {table_name} ({columns}) VALUES ({values});" #insert_statements.append(insert_statement) set_clause = ", ".join(["`{}`='{}'".format(col, str(value).replace("'", "''")) for col, value in row.items()]) # Construct the SQL INSERT statement using SET insert_statement = f"INSERT INTO {table_name} SET {set_clause};" insert_statements.append(insert_statement) for S in insert_statements: print(S) return insert_statements if __name__ == "__main__": print ("") options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] , 2: ['generate sql to fix conf_user dups', correct_dup_user_rows] , 3: ['add names to new accounts', find_unnamed_people], 4: ['search for user', search_user], 5: ['generate insert statements', generate_insert_statements ] } if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): resp = int(sys.argv[1]) print("\n\nPerforming: %s\n\n" % options[resp][0]) else: print ('') for key in options: print(str(key) + '.\t' + options[key][0]) print('') resp = input('Choose: ') # Call the function in the options dict options[ int(resp)][1]()