canvasapp/flexday.py

import funcy, codecs, json, sys, csv, re, requests
from localcache2 import user_from_goo
from canvas_secrets import url
from pipelines import fetch
from users import getEmail
import util


def user_db_sync():
    # currently in db
    conusr = fetch("http://deep1:8080/dir_api.php?users=1")
    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])

    #fetch all staff from ilearn                        ILRN            unique emails
    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])

    for e in ilrn_emails:

        if not (e in conusr_emails) and e.endswith('@gavilan.edu'):
            E = funcy.first(funcy.where(ilrn,email=e))
            goo = E['login_id'][3:]
            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )


# no longer relevant cause we don't use personnel table anymore
def user_db_sync2():
    #fetch all personnel dir entries from dir_api.php.  PERSL           unique emails
    persl = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnel=1")
    persl_emails = set([x.lower() for x in funcy.pluck('email',persl)])
    #persl_ids = set([x.lower() for x in funcy.pluck('email',persl)])
    #
    #fetch all staff from ilearn                        ILRN            unique emails
    ilrn = json.loads(codecs.open("cache/ilearn_staff.json","r","utf-8").read())
    ilrn_emails = set([x.lower() for x in funcy.pluck('email',ilrn)])
    #
    #fetch all conf_users from dir_api.php              CONUSR          unique emails
    conusr = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?users=1")
    conusr_emails = set([x.lower() for x in funcy.pluck('email',conusr)])

    #fetch all gavi_personnel_ext  from dir_api.php     GPEREXT         must have column 'personnel' or 'c_users' or both.
    gperext = fetch("http://hhh.gavilan.edu/phowell/map/dir_api.php?personnelext=1")

    all_emails = set(persl_emails)
    all_emails.update(ilrn_emails)
    all_emails.update(conusr_emails)

    all_emails = list(all_emails)
    all_emails.sort()

    fout = codecs.open('cache/db_staff_report.csv','w','utf-8')
    fout.write('email,personnel_dir,ilearn,conf_user\n')
    for e in all_emails:

        if e in ilrn_emails and not (e in conusr_emails) and e.endswith('@gavilan.edu'):
            E = funcy.first(funcy.where(ilrn,email=e))
            goo = E['login_id'][3:]
            #print("not in conf_user: %s  \t %s \t %s" % (e,E['short_name'], E['login_id']) )
            print("INSERT INTO conf_users (goo,email,name) VALUES ('%s', '%s', '%s');" % (goo,e,E['short_name']) )

            # goo (minus G00) email, and name go into conf_users

        fout.write(e+',')
        if e in persl_emails:
            fout.write('1,')
        else:
            fout.write('0,')
        if e in ilrn_emails:
            fout.write('1,')
        else:
            fout.write('0,')
        if e in conusr_emails:
            fout.write('1,')
        else:
            fout.write('0,')
        fout.write('\n')
    fout.close()
    #

    #print( json.dumps( [persl,ilrn,conusr,gperext], indent=2 ) )
    print('done')


def get_best_user_record(rec_list):
    # rule is lowest id is used, unless in list of exceptions

    # key should be replaced with value. These ones don't follow typical lowest id rule.
    exceptions = { 120: 883,   # Gary Burce
                538: 955,   # Ronna de benedetti
                127: 957,   # Mia Cabello
                802: 963,   # binh vo
                1053: 963,
                923: 971,   # brianna aguilar
                933: 970,   # elif konus
                473: 879,   # tania maheu
                }

    # sort records by id
    s_recs = sorted(rec_list, key=sort_id)
    preferred = s_recs[0]

    # check for exceptions
    if int(preferred['id']) in exceptions:
        new_preferred_id = exceptions[int(preferred['id'])]
        for r in rec_list:
            if int(r['id']) == new_preferred_id:
                preferred = r
                break

    return preferred


# Get dup rows like this:
# SELECT * FROM conf_users
# WHERE goo IN ( SELECT goo FROM conf_users GROUP BY goo HAVING COUNT(*) >= 2 )
# ORDER BY goo;


def correct_dup_user_rows():
    '''
    Fixing the bad conf_users rows because the intranet1 SSO started changing how it returned the accounts:
    - email is either with @gavilan.edu or without, or with @my.gavilan.edu
    - but goo is correct

    1. change login functions to look up GOO in conf_users
       - still add new row if not present
    2. Find dups
       a. get lowest id (L), that is the correct one
       b. for higher id (H), replace H with L in: conf_signups.user,  conf_answers.user,  conf_hosts.host,    conf_logs <- abandonded     gavi_logs <-- can't really

    3. AND make a big overview page or report for all users/all years so I can check that records are complete
       - person
       - year or semester (conferences table)
       - their signups, hostings
       - there 'attended' and/or comments

    '''
    fname = 'cache/conf_users_dups.csv'
    with open(fname, 'r') as f:
        reader = csv.DictReader(f)
        data = list(reader)
    #print(data)
    pairs = funcy.group_by(lambda r: r['goo'], data)
    #print(json.dumps(pairs,indent=2))

    counter = 0


    for goo,recs in pairs.items():
        if goo == "0":
            continue     # skip fake user
        counter += 1

        #emails = funcy.pluck('email',recs)
        #print(list(emails))

        #ids = funcy.pluck('id',recs)
        #print(list(ids))

        s_recs = sorted(recs, key=sort_id)
        preferred = get_best_user_record(s_recs)

        if 1:
            for i,rec in enumerate(s_recs):
                col1 = "   "
                if rec == preferred: col1 = " * "
                # print(f"-- {col1} \t {rec['id']} \t {rec['goo']} \t {rec['email']} \t {rec['name']}")

        s_recs.remove(preferred)

        # Now loop through the non-preferred records, and update tables
        for NP in s_recs:
            #print(f"I want to remove conf_user id {NP['id']}")
            print(f"UPDATE conf_signups SET user={preferred['id']} WHERE user={NP['id']};")
            print(f"UPDATE conf_answers SET user={preferred['id']} WHERE user={NP['id']};")
            print(f"UPDATE conf_hosts SET host={preferred['id']} WHERE host={NP['id']};")
            print(f"DELETE FROM conf_users WHERE id={NP['id']};")
            # SELECT * FROM conf_answers where user=1142
            # SELECT * FROM conf_hosts where host=1142

        #print(f"{s_recs[0]['email']} - lowest id: {s_recs[0]['id']}- {len(s_recs)} records")
        #print()
    #print(f"Total dups: {counter}")


def sort_id(a):
    return int(a['id'])


def search_user(searchterm=''):
    if not searchterm:
        searchterm = input('search term: ')
    u = url + f"/api/v1/accounts/self/users?search_term={searchterm}"
    response = fetch(u)
    for R in response:
        R['email'] = getEmail(R['id'])
    #print(json.dumps(response, indent=2))
    return response

def search_and_select_user(searchterm):
    candidates = search_user(searchterm)
    if len(candidates) == 0: return 0
    if len(candidates) == 1: return candidates[0]

    for i,c in enumerate(candidates):
        print(f"   {i+1}:  {c['name']} \t {c['sis_user_id']} \t {c['email']} \t {c['created_at']}")
    choice = int(input('which user (0 for none)? '))
    if choice == 0: return 0
    return candidates[choice-1]

def find_unnamed_people():

    if 0:
        suffix = "_20220907"
        ilearn_users = json.loads( codecs.open(f'cache/allusers{suffix}.json','r','utf-8').read() )
        ilu_by_goo = {}
        for U in ilearn_users:
            if 'sis_user_id' in U and U['sis_user_id']:
                g = U['sis_user_id']
                g = g[3:]
                ilu_by_goo[g] = U
        #print(json.dumps(ilu_by_goo,indent=2))
        outfile = codecs.open(f'cache/allusers_by_goo{suffix}.json','w','utf-8')
        outfile.write( json.dumps(ilu_by_goo,indent=2))

    # get conf_users from flex day site
    url = "http://hhh.gavilan.edu/phowell/dir/api2.php?query=users"
    all_users = json.loads(requests.get(url, verify=False).content)
    unfixed = []
    unfound_goos = []
    for A in all_users['data']:
        if A['name'] == "":
            found_name = "*"
            record = user_from_goo(A['goo'])
            if record and 'name' in record:
                #print(record)
            #if A['goo'] in ilu_by_goo:
                #found_name = ilu_by_goo[A['goo']]['name']
                found_name = record['name']
            desc = f"Goo: {A['goo']}\t email: {A['email']}   \t new name: {found_name}"
            if found_name != '*':
                print(f"UPDATE conf_users SET name='{found_name}' WHERE goo='{A['goo']}';")
            else:
                unfixed.append(desc)
                unfound_goos.append(A['goo'])
    print()

    queries = []
    for i,g in enumerate(unfound_goos):
        print(g)
        choice = search_and_select_user(g)
        if choice != 0:
            qry = f"UPDATE conf_users SET name='{choice['name']}' WHERE goo='{g}';"
            queries.append(qry)
    print()
    for Q in queries:
        print(Q)


import pandas as pd

# Function to generate SQL INSERT statements
def generate_insert_statements(table_name='conf_sessions'):
    # Read the CSV into a pandas DataFrame
    df = pd.read_csv('cache/flexsessions.csv')

    # Drop the columns 'date' and 'Start' (case-sensitive)
    df = df.drop(columns=['date', 'Start'], errors='ignore')

    # Drop any columns that are unnamed (such as 'Unnamed: 8')
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    insert_statements = []

    # Iterate over each row in the DataFrame
    for index, row in df.iterrows():
        columns = ", ".join(df.columns)  # Get column names from the DataFrame
        values = ", ".join(["'{0}'".format(str(value).replace("'", "''")) for value in row.values])  # Escape single quotes in values

        # Construct the SQL INSERT statement
        #insert_statement = f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
        #insert_statements.append(insert_statement)

        set_clause = ", ".join(["`{}`='{}'".format(col, str(value).replace("'", "''")) for col, value in row.items()])

        # Construct the SQL INSERT statement using SET
        insert_statement = f"INSERT INTO {table_name} SET {set_clause};"
        insert_statements.append(insert_statement)


    for S in insert_statements:
        print(S)
    return insert_statements

def cross_check_users():
    """
    Reads:
      - ilearn_staff.json  (list of Canvas users; uses fields: sis_user_id, name, email, login_id)
      - conf_users.csv     (existing rows; headers include: goo)
    Writes:
      - conf_users_inserts.sql with INSERTs for users missing from conf_users

    Assumptions:
      - "goo" is sis_user_id with the leading 'G00' removed (e.g., G00138124 -> 138124).
      - Skip deactivated users (name contains "(Deactivated)" or login_id == "deactivated").
      - Skip users whose sis_user_id doesn’t match G00\d+.
      - New rows default to active=0 and p2id=NULL.
    """

    import json, csv, re

    STAFF_JSON = "cache/ilearn_staff.json"
    CONF_CSV   = "cache/conf_users.csv"
    OUT_SQL    = "cache/conf_users_inserts.sql"
    TABLE      = "conf_users"

    # ---- Load existing goo set from conf_users.csv ----
    existing_goos = set()
    with open(CONF_CSV, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            s = (row.get("goo") or "").strip()
            if s.isdigit():
                existing_goos.add(int(s))

    # ---- Load Canvas staff ----
    with open(STAFF_JSON, "r", encoding="utf-8") as f:
        users = json.load(f)

    def esc(s):
        if s is None:
            return "NULL"
        return "'" + s.replace("'", "''") + "'"

    inserts = []
    scanned = 0
    skipped_deactivated = 0
    skipped_bad_sis = 0
    missing = 0

    for u in users:
        scanned += 1
        name = (u.get("name") or "").strip()
        login = (u.get("login_id") or "").strip()
        sis = (u.get("sis_user_id") or "").strip()

        # Skip non G00-form sis_user_id
        m = re.fullmatch(r"G00(\d+)", sis)
        if not m:
            skipped_bad_sis += 1
            continue

        # Skip deactivated
        #low_name = name.lower()
        #if "(deactivated)" in low_name or login.lower() == "deactivated":
        #    skipped_deactivated += 1
        #    continue

        goo = int(m.group(1))
        if goo in existing_goos:
            continue

        email = (u.get("email") or "").strip() or None

        sql = (
            f"INSERT INTO `{TABLE}` (goo, email, name, active, p2id) "
            f"VALUES ({goo}, {esc(email)}, {esc(name)}, 0, NULL);"
        )
        inserts.append(sql)
        missing += 1

    with open(OUT_SQL, "w", encoding="utf-8") as f:
        f.write("-- Generated INSERTs for missing conf_users rows\n")
        f.write("\n".join(inserts))
        f.write("\n")

    print(f"Wrote {missing} INSERTs to {OUT_SQL}")
    print(f"Scanned: {scanned} | Existing goos: {len(existing_goos)} | Skipped deactivated: {skipped_deactivated} | Skipped bad sis: {skipped_bad_sis}")

if __name__ == "__main__":
    print ("")
    options = { 1: ['(old) sync conf_user and iLearn employee tables', user_db_sync2] ,
                2: ['generate sql to fix conf_user dups', correct_dup_user_rows] ,
                3: ['add names to new accounts', find_unnamed_people],
                4: ['search for user', search_user],
                5: ['generate insert statements', generate_insert_statements ],
                6: ['cross check users', cross_check_users ],
    }


    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
        resp = int(sys.argv[1])
        print("\n\nPerforming: %s\n\n" % options[resp][0])

    else:
        print ('')
        for key in options:
            print(str(key) + '.\t' + options[key][0])

        print('')
        resp = input('Choose: ')

    # Call the function in the options dict
    options[ int(resp)][1]()