import funcy, codecs, json, sys, csv, re def user_db_sync(): # currently in db conusr = fetch("http://192.168.1.6:8080/dir_api.php?users=1") conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)]) #fetch all staff from ilearn ILRN unique emails ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read()) ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)]) for e in ilrn_emails: if not (e in conusr_emails) and e.endswith('@gavilan.edu'): E = funcy.first(funcy.where(ilrn,email=e)) goo = E['login_id'][3:] #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) ) print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) ) def user_db_sync2(): #fetch all personnel dir entries from dir_api.php. PERSL unique emails persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1") persl_emails = set([x.lower() for x in funcy.pluck('email',persl)]) #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)]) # #fetch all staff from ilearn ILRN unique emails ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read()) ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)]) # #fetch all conf_users from dir_api.php CONUSR unique emails conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1") conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)]) #fetch all gavi_personnel_ext from dir_api.php GPEREXT must have column 'personnel' or 'c_users' or both. gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1") all_emails = set(persl_emails) all_emails.update(ilrn_emails) all_emails.update(conusr_emails) all_emails = list(all_emails) all_emails.sort() fout = codecs.open('cache/db_staff_report.csv','w','utf-8') fout.write('email,personnel_dir,ilearn,conf_user\n') for e in all_emails: if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'): E = funcy.first(funcy.where(ilrn,email=e)) goo = E['login_id'][3:] #print("not in conf_user: %s \t %s \t %s" % (e,E['short_name'], E['login_id']) ) print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) ) # goo (minus G00) email, and name go into conf_users fout.write(e+',') if e in persl_emails: fout.write('1,') else: fout.write('0,') if e in ilrn_emails: fout.write('1,') else: fout.write('0,') if e in conusr_emails: fout.write('1,') else: fout.write('0,') fout.write('\n') fout.close() # #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) ) print('done') def get_best_user_record(rec_list): # rule is lowest id is used, unless in list of exceptions # key should be replaced with value. These ones don't follow typical lowest id rule. exceptions = { 120: 883, # Gary Burce 538: 955, # Ronna de benedetti 127: 957, # Mia Cabello 802: 963, # binh vo 1053: 963, 923: 971, # brianna aguilar 933: 970, # elif konus 473: 879, # tania maheu } # sort records by id s_recs = sorted(rec_list, key=sort_id) preferred = s_recs[0] # check for exceptions if int(preferred['id']) in exceptions: new_preferred_id = exceptions[int(preferred['id'])] for r in rec_list: if int(r['id']) == new_preferred_id: preferred = r break return preferred # Get dup rows like this: # SELECT * FROM conf_users # WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 ) # ORDER BY goo; def correct_dup_user_rows(): ''' Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts: - email is either with @gavilan.edu or without, or with @my.gavilan.edu - but goo is correct 1. change login functions to look up GOO in conf_users - still add new row if not present 2. Find dups a. get lowest id (L), that is the correct one b. for higher id (H), replace H with L in: conf_signups.user, conf_answers.user, conf_hosts.host, conf_logs <- abandonded gavi_logs <-- can't really 3. AND make a big overview page or report for all users/all years so I can check that records are complete - person - year or semester (conferences table) - their signups, hostings - there 'attended' and/or comments ''' fname = 'cache/conf_users_dups.csv' with open(fname, 'r') as f: reader = csv.DictReader(f) data = list(reader) #print(data) pairs = funcy.group_by(lambda r: r['goo'], data) #print(json.dumps(pairs,indent=2)) counter = 0 for goo,recs in pairs.items(): if goo == "0": continue # skip fake user counter += 1 #emails = funcy.pluck('email',recs) #print(list(emails)) #ids = funcy.pluck('id',recs) #print(list(ids)) s_recs = sorted(recs, key=sort_id) preferred = get_best_user_record(s_recs) if 1: for i,rec in enumerate(s_recs): col1 = " " if rec == preferred: col1 = " * " # print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}") s_recs.remove(preferred) # Now loop through the non-preferred records, and update tables for NP in s_recs: #print(f"I want to remove conf_user id {NP['id']}") print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};") print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};") print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};") # SELECT * FROM conf_answers where user=1142 # SELECT * FROM conf_hosts where host=1142 #print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records") #print() #print(f"Total dups: {counter}") def sort_id(a): return int(a['id']) if __name__ == "__main__": print ("") options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] , 2: ['generate sql to fix conf_user dups', correct_dup_user_rows] , } if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): resp = int(sys.argv[1]) print("\n\nPerforming: %s\n\n" % options[resp][0]) else: print ('') for key in options: print(str(key) + '.\t' + options[key][0]) print('') resp = input('Choose: ') # Call the function in the options dict options[ int(resp)][1]()