from importlib import reload
import json, codecs, requests, re, pdb, csv, textdistance, collections
import sys, csv, string, funcy, math, shutil, os
import pytz, time
import pandas as pd
import matplotlib.pyplot as plt


#from pandas import TimeGrouper
from PIL import Image
from collections import defaultdict
from pipelines import fetch, fetch_stream, header, url, FetchError, put_file
from schedules import get_semester_schedule
from courses import course_enrollment, users_in_semester
from localcache import users_this_semester_db, unwanted_req_paths, timeblock_24hr_from_dt, dt_from_24hr_timeblock
from localcache import teachers_courses_semester, course_mode, sem_schedule
from localcache2 import all_2x_sem_courses_teachers, all_sem_courses_teachers
from schedules import campus_dept_hierarchy
#from pipelines import dean, dean_names  #TODO

from util import dept_from_name, most_common_item
from os.path import exists, getmtime

from localcache2 import refresh_semester_schedule_db
from canvas_secrets import url

from dateutil import parser
from datetime import datetime as dt
from datetime import timedelta
import datetime

import queue
from threading import Thread
from os import path

# for NLP
#import spacy
from gensim import corpora, models, similarities, downloader, utils
from nltk import stem


# todo: these constants

#last_4_semesters = 'fall2020 summer2020 spring2020 fall2019'.split(' ')
#last_4_semesters_ids = [62, 60, 61, 25]
last_4_semesters = 'spring2021 fall2020 summer2020 spring2020'.split(' ')
last_4_semesters_ids = [168, 65, 64, 62]

log_default_startdate = "2021-08-23T00:00:00-07:00"
lds_stamp = parser.parse(log_default_startdate)

recvd_date = '2023-01-01T00:00:00Z'
num_threads = 25
max_log_count = 500000


##########
##########
##########    GETTING USER DATA
##########
##########

# All users to a cache file     cache/allusers.json
def fetchAllUsers():

    if exists('cache/allusers.json'):
        time = date_time = dt.fromtimestamp( getmtime('cache/allusers.json') )
        newname = 'cache/allusers_'+ time.strftime('%Y%m%d') + ".json"
        print("renaming old data file to %s" % newname)
        os.rename('cache/allusers.json', newname)


    out1 = codecs.open('cache/allusers.json','w','utf-8')
    out2 = codecs.open('cache/allusers_ids.json','w','utf-8')
    all_u = fetch_stream(url + '/api/v1/accounts/1/users?per_page=100', 1)
    
    ids = []
    main_list = []
    for this_fetch in all_u:
        for U in this_fetch:
            ids.append(U['id'])
            main_list.append(U)
    
    ids.sort()
    out2.write( json.dumps(ids, indent=2))
    out1.write( json.dumps(main_list, indent=2))
    out2.close()
    out1.close()
    return ids
    

##########
##########
##########    TEACHERS LIST AND LOCAL USERS FILE
##########
##########

# Fetch teacher users objects from local cache
def teacherRolesCache():               # I used to be load_users
    users_raw = json.load(open('cache/ilearn_staff.json','r'))
    users = {}
    users_by_id = {}
    for U in users_raw:
        users[ U['login_id'] ] = U
        users_by_id[ U['id'] ] = U
    return users, users_by_id


# Outputs:  cache/ilearn_staff.json    
# Canvas:   Fetch all people with gavilan.edu email address        
def teacherRolesUpdateCache():    # I used to be get_users
    t = fetch('/api/v1/accounts/1/users?per_page=500&search_term=%40gavilan.edu&include[]=email')
    g = open('cache/ilearn_staff.json','w')
    g.write( json.dumps(t) )
    g.close()
    #put_file('/gavilan.edu/staff/flex/2020/','cache/','ilearn_staff.json')
    print("Wrote to 'cache/ilearn_staff.json'")
    return teacherRolesCache()
    
    
# Fetch preferred email address for a given user id.   ( Canvas )
def getEmail(user_id):
    results = fetch("/api/v1/users/" + str(user_id) + "/communication_channels")
    for r in results:
        if r['type']=='email':
            return r['address']
    return ''

  
##########
##########
##########    TEACHERS AND OTHER STAFF
##########
##########
#
# Gather all my info, CRM style, in the folder teacherdata
#
# 
# Typical actions: For everyone with a teacher role:
# - What are the courses they taught for the last X semesters?
# - What's their activity level each semester?
# - Which of those courses are Online, Hybrid or Face2face?
#     + column for each semester:  OHLOHL
# - How many online classes have they taught in the past?
# - Are they brand new, or brand new online?# further...
# - what's their department?
# - what's their badges and 'tech level?'
# - 


# All teachers in a particular course
def getAllTeachers(course_id=59):  # a list 
    qry = '/api/v1/courses/' + str(course_id) + '/search_users?enrollment_type=teacher'
    t = url + qry
    while(t): t = fetch(t)
#
def classType(t):
    if t == 'lecture': return 'L'
    if t == 'online': return 'O'
    if t == 'hours': return 'R'
    if t == 'lab': return 'A'
    if t == 'hybrid': return 'H'
    else: return 'L'                   # todo: fix bug in schedule parser so non-online classes have a type field
    
def my_blank_string(): return "no data"
def my_blank_dict(): return {'name':'NoName','email':'noemail@gavilan.edu'}
def my_empty_dict(): return defaultdict(my_blank_string)

def get_email_from_rec(name,name_to_record):
    #print "Looking up: " + name
    try:
        return name_to_record[name]['email']
    except Exception as e:
        print("Missing Teacher %s" % name)
        return 'noemail@gavilan.edu'


# Pull the staff directory on the webpage. Convert to pandas dataframe
def staff_dir(get_fresh=False):
    """
    if get_fresh:
        url = "http://www.gavilan.edu/staff/dir.php"
        regex = "var\slist=(\[.*\]);"
        response = requests.get(url).text
        m = re.search(regex,response)
        if m: 
            output = '{"staff":' + m.group(1) + '}'
            of = open('cache/teacherdata/staff_dir.json','w')
            of.write(output)
            js = json.loads(output)
            df = pd.DataFrame(js['staff'])
            return df
            print("Wrote cache/teacherdata/staff_dir.json")
        else:
            print("Failed on staff directory scrape")
            return ''
    else:
        input = json.loads(open('cache/teacherdata/staff_dir.json','r').read())
        df = pd.DataFrame(input['staff'])
    return df
    """
    
    # TODO lol get fresh again... 
    
    old_dir = csv.reader(open('cache/personnel2020_04_12.csv'), delimiter=',')
    dept1_crxn = {r[0]:r[1] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
    dept2_crxn = {r[0]:r[2] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
    title_crxn = {r[0]:r[3] for r in csv.reader(open('cache/dir_corrections.csv'), delimiter=',') }
    revised_dir = [  ]
    columns = next(old_dir)
    
    for r in old_dir:
        old_dept = r[2]
        if old_dept in dept1_crxn:
            new_one = dept1_crxn[old_dept]
            if dept2_crxn[old_dept]: new_one += '/' + dept2_crxn[old_dept]
            if title_crxn[old_dept]: new_one += '/' + title_crxn[old_dept]
            r[2] =  new_one
        revised_dir.append(r)
    print(revised_dir)            
    return pd.DataFrame(revised_dir,columns=columns)    


#
#
#
#    ###
#    ###   TEACHER   CRM   FUNCTIONS
#    ###
#

def schedForTeacherOverview(long,short):
    sem = get_semester_schedule(short)
    sem['type'] = sem['type'].apply(classType)
    #sem['code'] = sem[['code','type']].apply(' '.join,axis=1)
    sem['sem'] = short
    sem = sem.drop(['time','loc','name','date','days'],axis=1)  # ,'crn'
    return sem


# Return a dataframe of the last 4 semester schedules put together
def oneYearSchedule():
    sp19 = schedForTeacherOverview('2019spring','sp19')
    su19 = schedForTeacherOverview('2019summer','su19')
    fa19 = schedForTeacherOverview('2019fall','fa19')
    sp20 = schedForTeacherOverview('2020spring','sp20')
    
    # The four-semester schedule
    a = pd.concat([sp19,su19,fa19,sp20], sort=True, ignore_index=True)
    a = a.drop(['cap','cmp','extra','rem','sec','cred','act'], axis=1)
    a.to_csv('cache/one_year_schedule.csv')
    return a

def num_sections_last_year(line):
    #if not type(line)=='str': return 0
    parts = line.split(' ')
    return len(parts)

def sec_type_stats(line):
    #print(type(line))
    #if not type(line)=='str': return {'fail':1}
    #print("in sts: " + str(line))
    parts = line.split(' ')
    output = defaultdict(int)
    for p in parts: output[p] += 1
    return output

def prct_online(line):
    d = sec_type_stats(line)
    #print(d)
    total = 0
    my_total = 0
    for k,v in d.items():
        total += v
        if k == 'O': my_total += v
    return int(100 * ((1.0)*my_total / total))

def prct_lecture(line):
    #print(line)
    d = sec_type_stats(line)
    #if 'fail' in d: return 0
    total = 0
    my_total = 0
    for k,v in d.items():
        total += v
        if k == 'L': my_total += v
    return int(100 * ((1.0)*my_total / total))


def prct_hybrid(line):
    d = sec_type_stats(line)
    #if 'fail' in d: return 0
    total = 0
    my_total = 0
    for k,v in d.items():
        total += v
        if k == 'H': my_total += v
    return int(100 * ((1.0)*my_total / total))

# Given the names of teachers in last year's schedules, fill in email, etc. from ilearn files
def teacher_basic_info(sched, from_ilearn, names):
    bi = from_ilearn    # pd.DataFrame(from_ilearn)
    bi.rename(columns={'id':'canvasid','login_id':'goo'}, inplace=True)
    # bi.drop(['name',],axis=1,inplace=True)
    
    #print(bi)
    #input('xx')
    
    sp20 = schedForTeacherOverview('2020spring','sp20')
    
    
    codes_sp20 = sp20.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) )
    crns_sp20 = sp20.groupby('teacher')['crn'].apply( lambda x: ' '.join( map( str, funcy.distinct(x))) )
    codes_sp20.rename(columns={'code':'sp20code'}, inplace=True)
    codes_sp20.to_csv('cache/trash/codes_sp20.csv',header=True)
    crns_sp20.rename(columns={'crn':'sp20crn'}, inplace=True)
    crns_sp20.to_csv('cache/trash/crns_sp20.csv',header=True)

                   
    a = sched.groupby('teacher')['code'].apply( lambda x: ' '.join(funcy.distinct(x)) )
    a = pd.DataFrame(a)
    a.reset_index(inplace=True)
    a['dept'] = a.apply(guessDept,axis=1)
    print(a)
    
    def find_that_name(x):
        #print(x)
        if 'teacher' in x: return names(x['teacher'])
        #print('name not found?')
        return ''
    
    a['ilearn_name'] = a.apply( find_that_name, axis=1) 
    
    a.rename(columns={'code':'courses'}, inplace=True)
    #print(type(a))
    a.reset_index(inplace=True)
    
    a = pd.merge(a,codes_sp20.rename('sp20courses'), on='teacher')
    a = pd.merge(a,crns_sp20.rename('sp20crns'), on='teacher')
    a.to_csv('cache/trash/sched_w_sp20.csv',header=True)
    print(a)
    
    a['canvasid'] = a['teacher'].map(names)
    #print(a)
    c = pd.merge(bi, a, left_on='name', right_on='ilearn_name', how='outer')
    c.to_csv('cache/trash/basic.csv',header=True)
    #print(c)
    return c


# TODO Old and broken

# what percentage of their sections were online / hybrid /lecture ?
# Consumes:  output/semesters/fa19_sched.json and etc for 1 year 
# Outputs:   cache/teacher_by_semester.csv, 
def teacherModalityHistory(sched=[],names=[]):
    if not len(sched): 
        sched = oneYearSchedule()
        #names = match_username()
    
    # How many classes a teacher taught lect/online/hybrid/hours
    sec_type = sched.groupby(['teacher','sem'])['type'].apply(' '.join)
    sec_type.to_csv('cache/teacherdata/teacher_by_semester.csv',header=True)       
                                                                ## THIS IS THE LIST of how many
                                                                ## lecture, hybrid, online they've taught   
    
    #sec_type = pd.read_csv('cache/teacherdata/teacher_by_semester.csv')
    
    sec_grp = sec_type.groupby('teacher').aggregate( ' '.join )
    #sec_grp.to_csv('cache/trash/sec_grp_3.csv',header=True)
    
    #sec_grp = sec_grp.iloc[1:]   ## I'm seeing bad items on the first 2
    #sec_grp.drop(index='teacher')
    #sec_grp.to_csv('cache/trash/sec_grp_0.csv',header=True)
    
    #
    sec_grp = pd.DataFrame(sec_grp)
    #print(type(sec_grp))
    sec_grp['prct_online']  = sec_grp['type'].map(prct_online) 
    
    sec_grp['prct_lecture'] = sec_grp['type'].map(prct_lecture) 
    sec_grp['prct_hybrid']  = sec_grp['type'].map(prct_hybrid) 
    sec_grp['num_sections_last_year'] = sec_grp['type'].map(num_sections_last_year) 
    sec_grp.drop('type',axis=1,inplace=True)
    sec_grp.reset_index(inplace=True)
    sec_grp.to_csv('cache/teacherdata/modality_history.csv')
    return sec_grp
    
    
def teacherCourseHistory(a,names):
    pass
    # actually not using this. moved to _basic_info
    
    # YEEEAH
    sched = a.groupby(['teacher','code'])
    #for name,group in sched:
    #    print(name)
    #print(sched.count())
    return
    a['name'] = a.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1)
    a['email'] = a.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1)
    a.sort_values(by=['dept','teacher','codenum'],inplace=True)
    a = a.drop(['teacher'],axis=1)  
    a.to_csv('cache/teacherdata/courses_taught.csv')
    
    return a
    """
    d = a.groupby(['teacher'])                     # ,'dept','codenum','codeletter'
    
    out1 = open('teacherdata/courses_taught.csv','w')  
    by_dept = {}                                 # x todo: sort by dept also
    for name, group in d:
        #print name
        if re.search(r'^\d+',name) or name=='TBA':
            print("Skipping weird name: ", name)
            continue
        rec = {'email':'xx'}
        try:
            rec = records_by_sname[name]
            #print rec
        except Exception as e:
            print("Missing Teacher %s" % name)
            continue
        out1.write(name+"\t"+rec['email'])
        s = set()
        #print group
        for idx,r in group.iterrows():
            s.add( str(r[1]) + str(r[2]) + str(r[3]))
        for clas in sorted(s):
            d = dept_from_name(clas)
            if d in by_dept:
                if name in by_dept[d]:
                    by_dept[d][name].append(clas)
                else:
                    by_dept[d][name] = [ clas, ]
            else:
                by_dept[d] = { name: [ clas, ] }
                
            out1.write("\n\t"+str(clas))
        out1.write("\n")
    out1.write( json.dumps(by_dept,indent=2))"""


# Consumes:  output/semesters/fa19_sched.json and etc for 1 year 
# Outputs:   cache/course_teacher_combos.csv, 
def teacherSharedCourses(a=[]):
    if not len(a): a = oneYearSchedule()
    
    # List of classes. Group by teacher/format. Shows who has historically
    # taught a class and who teaches it most often.
    c = a.drop(['code','partofday','sem','site','type'],axis=1)   #,'dept','codeletter'
    c = c.groupby(['dept','codenum','codeletter'])  #,'teacher'
    c = c.aggregate(lambda x: set(x))
    c.to_csv('teacherdata/course_teacher_combos.csv')      ## THIS is the list of teachers who
                                                     ## share courses
    return c


# TODO: this is broken
# Consumes:  output/semesters/fa19_sched.json and etc for 1 year 
# Outputs:   cache/num_courses_per_dept.csv                 (not teacher_course_oer_deptcount)
# How many courses in each department were taught in the last year?
def departmentCountCourses(a=[]):
    if not len(a): a = oneYearSchedule()
    
    tt = a.drop(['code','partofday','sem','site','type'],axis=1)   #,'dept','codeletter'

    """records_by_sname = defaultdict(my_empty_dict, match_usernames())
    tt.drop_duplicates(keep='first',inplace=True)
    tt['name'] = tt.apply(lambda x: records_by_sname[x['teacher']]['name'],axis=1)
    tt['email'] = tt.apply(lambda x: records_by_sname[x['teacher']]['email'],axis=1)
    tt = tt.drop(['teacher'],axis=1)  
    tt.sort_values(by=['dept','name','codenum'],inplace=True)
    count = tt['dept'].value_counts()
    count.to_csv('cache/num_courses_per_dept.csv', header=True)"""


def clean_nonprint(s):
    return re.sub(f'[^{re.escape(string.printable)}]', '', s)

def read_cmte(names):
    output = []
    out2 = defaultdict(list)
    input = codecs.open('cache/teacherdata/committees_2018_2019.csv','r','utf-8')
    with input as csvfile:
        cmtereader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in cmtereader:
            for R in row:
                R = R.strip()
                R = clean_nonprint(R)
            (fname,lname,cmtes) = row
            a = re.split(",\s*",cmtes)
            if len(a)>1:
                cmtes = a
            else:
                cmtes = a
            
            name1 = lname + ", " + fname
            name2 = fname + " " + lname
            name = name1
            realname = names(name1)
            if not realname:
                realname = names(name2)
                name = name2
            if realname:
                for cmm in cmtes:
                    output.append( [realname, cmm] )
                    out2[realname].append(cmm)
            else:
                print("committee participant name failed: %s / %s:\t%s" % (name1,name2,str(a)))
                print(type(name1))
    #print(out2)            
    return output,out2

def read_training_records():
    myinput = open('cache/teacherdata/more_2018_2019_training_attendance.txt','r').readlines()
    current_sesh = ""
    ppl_in_sesh = {}
    all_ppl = set()
    
    for L in myinput:
        L = L.strip()
        if L:
            if L.startswith('#'):
                ma = re.search(r'^\#\s(.*)$',L)
                if ma:
                    current_sesh = ma.group(1)
                else:
                    print("-- read_training_records: Couldn't find training set? " + L)
            else:
                if current_sesh in ppl_in_sesh:
                    ppl_in_sesh[current_sesh].append(L)
                else:
                    ppl_in_sesh[current_sesh] = [ L, ]
                all_ppl.add(L)
    if 0:
        print(ppl_in_sesh)
        print(all_ppl)
    
    # Want to pivot the dict, so key is a name, value is another dict, where k2 is session name, v2 is Y/N
    d_of_d = defaultdict(dict)
    
    for k,v in ppl_in_sesh.items():
        for user in v:
            d_of_d[user][k] = 'Y'
    
    return d_of_d    

# open a file and mark the people with their ids given. Return a dataframe
def read_bootcamp1(filename):
    a = pd.read_csv(filename)
    #print(a)
    b = a.loc[:, ['canvas_id','grade','last_activity']]
    b.rename(columns={'canvas_id':'bc1canvasid','grade':'bootcamp_grade','last_activity':'bootcamp_date'}, inplace=True)
    #print(b)
    return b

# open a file and mark the people with their ids given. Return a dataframe
def read_bootcamp2(filename):
    a = pd.read_csv(filename)
    #print(a)
    b = a.loc[:, ['canvas_id','grade','last_activity']]
    b.rename(columns={'canvas_id':'bc2canvasid','grade':'bootcamp_progress','last_activity':'bootcamp_date'}, inplace=True)
    #print(b)
    return b


def not_blank_or_pound(L):
    if L.startswith("#"): return False
    L = L.strip()
    if L == "": return False
    return True

def temp1(x):
    #print(x[1])
    return x[1]

def add_realnames(df,names):    # the surveys. raw name is in 2nd column
    df['ilearn_name'] = df.apply( lambda x: names(temp1(x),1), axis=1)
    return df

def compareToughNames(a,b):
    # search for a in b
    m = re.search(a, b)
    if m: return True
    return False
        

def compareNames(a,b,verbose=0):
    if a == b: return True
        
    cnDBG = 0
    try:
        parts_a = [ W.lower() for W in re.split("[\s,]", a) ]
        [ x.strip() for x in parts_a ]
        
        parts_b = [ W.lower() for W in re.split("[\s,]", b) ]
        [ x.strip() for x in parts_b ]
        
        pa2 = sorted([ parts_a[0], parts_a[-1] ])
        pb2 = sorted([ parts_b[0], parts_b[-1] ])
        
        if pa2 == pb2: 
            if cnDBG: print("->Match: %s, %s" % (a,b)) 
            return True
        if pa2[0] == pb2[0] or pa2[-1] == pb2[-1]:
            if cnDBG: print("--->Near match: %s" % b) 
            return False
    
    except Exception as e:
        #print("Problem with compareNames %s , %s" % (a,b))
        #print(e)
        return False
        
    if len(pa2[0])>3 and len(pb2[0])>3:
        if pa2[0][0] == pb2[0][0]:
            if pa2[0][1] == pb2[0][1]:
                if pa2[0][2] == pb2[0][2]:
                    if cnDBG: print("===> Near match (first 3): %s, %s, %s, %s" % (a, b, pa2[0], pb2[0])) 
                    pass
    
    b = b.lower()
    a = a.lower()
    
    #if verbose: print("searching: %s / %s" % (a,b))
    if re.search( b, a):
        #print("REGEX MATCH: %s | %s" % (a,b))
        return True
    if re.search( a, b):
        #print("REGEX MATCH: %s | %s" % (a,b))
        return True
    return False    

def find_ilearn_record(ilearn_records,manual_records, othername,verbose=0):
    # manual records are ('name':'canvas_id')
    #print(ilearn_records)
    if not othername: return ""
    if type(othername) == type(1.25): return ""
    #if math.isnan(othername): return False
        
    if othername in manual_records:
        a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[othername]) ))
        if a:
            return a['name']

    for x in ilearn_records:
        #print('f_i_r')
        #print(othername)
        #print(x)
        if compareNames(othername,x['name'],verbose):
            return x['name']
    
    for k,v in manual_records.items():
        #print(k)
        #print(othername)
        #print(type(othername))
        b = re.search( k, othername)
        if b:
            a = funcy.first( funcy.where( ilearn_records, id=int(manual_records[k]) ))
            if a:
                return a['name']
    return ""


def manualNamesAndDept():
    # copied from // getTeachersInfoMain ....   
    
    schedule_one_yr = oneYearSchedule()
    from_ilearn = list(  map(  lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \
                    json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) )
    manual_names = manualNames()
    names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names)
    teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn, names_lookup)
    # till here
    
    
    # the staff directory
    dr = staff_dir(False)
    print(dr)
    print(dr.columns)
    print( dr['department'].unique() )
    
    # now to reconcile and combine these....
    #
    # we want:
    #  - alternate names of academic / other depts, with one preferred
    #  - some people are PT Fac, FT Fac, Director, assistant, spec, and some titles are unknown. 
    #  - sometimes the hierarchy is of departments, and sometimes of people. try not to confuse that.
    #
    
    
    # eventually, want to get pics or other info from other sources too, o365, cranium cafe, etc
    #
    

def manualNames():
    mm = dict([ x.strip().split(',') for x in \
            open('cache/teacherdata/teacher_manual_name_lookup.csv','r').readlines()])  
    mz = {}
    for k,v in mm.items():
        mz[k] = v
        mz[k.lower()] = v
        parts = k.split(" ")
        if len(parts)==2:
            mz[  parts[1] + ", " + parts[0] ] = v
            mz[  parts[1] + "," + parts[0] ] = v
    #print(mz)
    return mz

# given a list of class codes, return the most common (academic) department
def guessDept(d_list):
    li = str(d_list.code).split(" ")
    count = defaultdict(int)
    #print(str(d_list.code))
    for i in li:
        m = re.search(r'^([A-Z]+)$',i)
        if m:
            count[m.group(1)] += 1
    mmax = 0
    max_L = ''
    for k,v in count.items():
        #print("  %s:%i,   " % (k,v), end='')
        if v > mmax: 
            mmax = v
            max_L = k
    print("")
    return max_L

"""
# Faculty Info Plans


bootcamp_active.csv				Started bootcamp. Remind them to finish it?

bootcamp_passed.csv				Badge'd for BC. Online and Hybrid teachers not on this list need reminding. 

courses_taught.csv				x

course_teacher_combos.csv		Teachers who share the teaching of a course. Courses in common. 

emails_deans+chairs.txt         Just a email list

FA2017 Faculty Survey.csv       Look at answers for video, helpful formats, and comments

faculty_main_info.csv           Has percentage mix of a teachers' online/hybrid/lecture history

historical_shells_used.json     x

SP2019 Faculty Survey.csv       Look at rate tech skills, topics interested in, would add video, and comments

committees 2018 2019.csv       Committees people serve on.


Not so useful:

teacher_by_semester.csv         precursor to faculty_main_info. Has semesters separated.   

"""
#
#
#
# Call all the teacher info / CRM gathering stuff
# Make one big csv file of everything I know about a teacher    
def getTeachersInfoMain():

    schedule_one_yr = oneYearSchedule()
    #print(schedule_one_yr)
    #if input('q to quit ')=='q': return
    
    # comes from  teacherRolesUpdateCache  ...  search for  @gavilan.edu in email address
    from_ilearn = list(  map(  lambda y: funcy.select_keys( lambda z: z in ['name','id','email','login_id','sortable_name'], y), \
                    json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read()) ) )
    #names_from_ilearn = list( [x.lower() for x in map( str, sorted(list(funcy.pluck('name',from_ilearn)))) ] )
    from_ilearn_df = pd.DataFrame(from_ilearn)
    
    
    manual_names = manualNames()
    names_lookup = funcy.partial(find_ilearn_record, from_ilearn, manual_names)
    #print(from_ilearn_df)
    #if input('q to quit ')=='q': return
    
    
    #print(schedule_one_yr)
    #print("This is one year schedule.")
    #input('\npress enter to continue')
    
    teacher_info = teacher_basic_info(schedule_one_yr, from_ilearn_df, names_lookup)
    #print(teacher_info)
    #input('\nThis is teacher info.\npress enter to continue')

    modality_history = teacherModalityHistory(schedule_one_yr,names_lookup)
    print(modality_history)
    #print("This is teacher modality history.")
    #input('\npress enter to continue')

    
    master = pd.merge( modality_history, teacher_info, on='teacher', how='outer')
    print(master)
    master.to_csv('cache/trash/joined1.csv')
    print(master.columns)
    #input('\nThis is Joined 1.\npress enter to continue')
    
    wp = read_bootcamp1('cache/teacherdata/bootcamp_passed.csv')
    #print(wp)
    master2 = pd.merge( master, wp, left_on='canvasid_x', right_on='bc1canvasid', how='outer')
    master2.to_csv('cache/trash/joined2.csv')
    print(master2)
    print(master2.columns)
    #input('\nThis is Joined 2.\npress enter to continue')
    
    
    wp = read_bootcamp2('cache/teacherdata/bootcamp_active.csv')
    master3 = pd.merge( master2, wp, left_on='canvasid_x', right_on='bc2canvasid', how='outer')
    master3.to_csv('cache/trash/joined3.csv')
    print(master3)
    print(master3.columns)
    #input('\nThis is Joined 3.\npress enter to continue')
    
    
    # THE VIEWS / HISTORY. UPDATE with    get_recent_views()   .... check it for appropriate dates....
    views = json.loads( codecs.open('cache/teacherdata/activitysummary.json','r','utf-8').read() )
    vdf = pd.DataFrame.from_dict(views,orient='index',columns=['cid','cname','views','goo','dates','dateviews'])
    print(vdf)
    #input('k')
    
    #master3.set_index('canvasid_x')
    master3 = pd.merge(master3, vdf, left_on='canvasid_x', right_on='cid',how='outer')
    
    dir_records = pd.DataFrame(staff_dir())
    dir_records['email'] = dir_records['email'].str.lower()
    master3['email'] = master3['email'].str.lower()
    
    print(dir_records)
    master3 = pd.merge(master3, dir_records, on='email',how='outer')
    print(master3)
    #if input('q to quit ')=='q': return
    
    #master3.fillna(0, inplace=True)
    #master3['views'] = master3['views'].astype(int)
    #master3['num_sections_last_year'] = master3['num_sections_last_year'].astype(int)
    
    
    #cmte = pd.read_csv('cache/teacherdata/committees_2018_2019.csv')
    cmte,cmte_by_name = read_cmte(names_lookup)
    cmte_str_by_name = {}
    for k in cmte_by_name.keys():
        #print(k)
        #print(cmte_by_name[k])
        cmte_str_by_name[k] = ",".join(cmte_by_name[k])
    cc = pd.DataFrame.from_dict(cmte_str_by_name,orient='index',columns=['committees'])   # 'teacher',
    cc.reset_index(inplace=True)
    master4 = pd.merge(master3, cc, left_on='name', right_on='index', how='outer')
    master4.to_csv('cache/trash/joined4.csv')
    
    master4.drop(['teacher','ilearn_name','canvasid_y','bc1canvasid','bc2canvasid','cid','cname','index_y'],axis=1,inplace=True)
    
    # Exclude surveys for now
    """
    survey_2017 = pd.read_csv('cache/teacherdata/FA2017 Faculty Survey.csv')
    survey_2017 = add_realnames(survey_2017,names_lookup)
    survey_2017.to_csv('cache/trash/survey1.csv')
    master5 = pd.merge(master4, survey_2017, left_on='name', right_on='ilearn_name', how='left')
    master5.to_csv('cache/trash/joined5.csv')
    
    survey_2019 = pd.read_csv('cache/teacherdata/SP2019 Faculty Survey.csv')
    survey_2019 = add_realnames(survey_2019,names_lookup)
    master6 = pd.merge(master5, survey_2019, left_on='name', right_on='ilearn_name', how='left')
    master6.to_csv('cache/trash/joined6.csv')


    newnames = [ x.strip() for x in open('cache/poll_question_names.txt','r').readlines() ]
    namedict = {}
    for i,n in enumerate(newnames):
        if i%3==1: newname = n
        if i%3==2: namedict[oldname] = newname
        if i%3==0: oldname = n
    master6 = master6.rename(columns=namedict)
    master6.to_csv('cache/teacherdata/staff_main_table.csv')
    master6.to_csv('cache/teacherdata/staff_main_table.csv')
    """
    
    
    master4.to_csv('cache/teacherdata/staff_main_table.csv')
    master4.to_csv('gui/public/staff_main_table.csv')    

    other_training_records = read_training_records()
    #print(json.dumps(other_training_records,indent=2))
    #print("This is misc workshops.")
    tt = pd.DataFrame.from_dict(other_training_records,orient='index')
    tt = tt.fillna("")
    #print(tt)
    #input('\npress enter to continue')
    
    
    #teacherSharedCourses(schedule_one_yr)
    #getAllTeachersInTerm()
    

# TODO - broken

def enroll_staff_shell():
    pass
    """staff = users_with_gavilan_email()
    for i,s in staff.iterrows():
        print(s['canvasid'],s['name'])
        u = url + '/api/v1/courses/8528/enrollments'
        param = {
            'enrollment[user_id]':s['canvasid'],
            'enrollment[type]': 'StudentEnrollment',
            'enrollment[enrollment_state]': 'active',
        }
         
        res = requests.post(u, headers = header, data=param)
        print(res.text)
    """


#"Jun 28 2018 at 7:40AM" -> "%b %d %Y at %I:%M%p"
#"September 18, 2017, 22:19:55" -> "%B %d, %Y, %H:%M:%S"
#"Sun,05/12/99,12:30PM" -> "%a,%d/%m/%y,%I:%M%p"
#"Mon, 21 March, 2015" -> "%a, %d %B, %Y"
#"2018-03-12T10:12:45Z" -> "%Y-%m-%dT%H:%M:%SZ"


# take a list of raw hits. 
def activity_summary(hits):
    #infile = "cache/teacherdata/activity/G00101483.json"
    #data = json.loads(open(infile,'r').read())
    #hits = data['raw']
    if not hits:
        return [ [], [], ]
    dt_list = []
    
    one_week = datetime.timedelta(days=14)                       # actually two....
    today = dt.now().replace(tzinfo=pytz.timezone('UTC'))
    
    target = today - one_week
    
    for h in hits:
        the_stamp = parser.parse(h['created_at'])
        if the_stamp > target:
            dt_list.append(the_stamp)
    df = pd.DataFrame(dt_list,  columns=['date',])
    df.set_index('date', drop=False, inplace=True)
    df.rename(columns={'date':'hits'}, inplace=True)
    #df.resample('1D').count().plot(kind='bar')
    #return df.resample('1D').count().to_json(date_format='iso')
    #print(hits)
    #print(df)
    if not df.size:
        return [ [], [], ]
    bins = df.resample('1D').count().reset_index()
    bins['date'] = bins['date'].apply(str)
    #print(bins)
    return [bins['date'].to_list(), bins['hits'].to_list()]
    
    #plt.show()
    
    #df = df.groupby([df['date'].dt.to_period('D')]).count().unstack()
    #df.groupby(TimeGrouper(freq='10Min')).count().plot(kind='bar')
    #df.plot(kind='bar')


# next step
# 1. save timestamp of the fetch
#
# 2. parse it and only fetch since then. afterwards, pull out non-hits. Summarize day/week/month stats. 
#
# 2a. merge old and new records, and re-summarize.
#
# 3. Next improvements in GUI. hook up to python server backend.
#
# Get views counts on current teachers. todo: month is hardcoded here
def get_recent_views(id=1):
    dt_format = "%Y-%m-%dT%H:%M:%SZ"
    default_start_time = dt.strptime("2020-08-14T00:00:00Z", dt_format)
    default_start_time = default_start_time.replace(tzinfo=pytz.timezone('UTC'))
    end_time = dt.now(pytz.utc)
    print("End time is: %s" % str(end_time))
    myheaders = "x,teacher,prct_online,prct_lecture,prct_hybrid,num_sections_last_year,canvasid_x,name,sortable_name,goo,email,index_x,courses,dept,ilearn_name_x,canvasid_y,canvasid_x,bootcamp_grade,bootcamp_date_x,canvasid_y,bootcamp_progress,bootcamp_date_y,index_y,committees".split(",")
    
    teachers = [row for row in csv.reader(open('cache/teacherdata/staff_main_table.csv','r'))][1:]
    
    #tt = teachers[6:10]
    
    summary = {}
    
    for t in teachers:
        name = t[1]
        if name=="" or name=="TBA": continue
        if not t[6]: continue
        the_id = int(float(t[6]))
        if the_id == 290: continue   # STAFF STAFF
        goo = t[9]
        print(goo)
        
        # read log of this person:
        try:
            prev_logf = codecs.open('cache/teacherdata/activity/%s.json' % goo,'r','utf-8')
            prev_log = json.loads(prev_logf.read())
            prev_logf.close()
        except:
            print("Exception happened on reading previous temp logs.")
            prev_log = ''
        
        if type(prev_log) == dict:
            lastfetch = dt.strptime(prev_log['meta']['lastfetch'], dt_format)
            lastfetch = lastfetch.replace(tzinfo=pytz.timezone('UTC'))
            print("last fetch is: " + str(lastfetch))
            print("Hits BEFORE was: %i" % len(prev_log['raw']))
        else:
            lastfetch = default_start_time
            prev_log = { "raw":[], }
        
        end_time = dt.now(pytz.utc)
        u = url + "/api/v1/users/%s/page_views?start_time=%s&end_time=%s&per_page=100" % (str(the_id),lastfetch.strftime(dt_format), end_time.strftime(dt_format))
        #print(u)
        #input('getting this url')
        
        print(name + "\t",end='\n')
        if 1:    # get fresh data?
            r = fetch(u)
            prev_log['raw'].extend( r )
        summ = activity_summary(prev_log['raw'])
        mydata = {'meta':{'lastfetch':end_time.strftime(dt_format)},'summary':summ,'raw':prev_log['raw']}
        codecs.open('cache/teacherdata/activity/%s.json' % goo,'w','utf-8').write( json.dumps(mydata,indent=2))
        summary[the_id] = [the_id, name, len(prev_log['raw']),goo, summ ,mydata['meta']]
        print("Hits AFTER is: %i" % len(prev_log['raw']))
    codecs.open('cache/teacherdata/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) )
    codecs.open('gui/public/activitysummary.json','w','utf-8').write( json.dumps(summary,indent=2) )


# TODO broken?
# Have they taught online or hybrid classes?        
"""
def categorize_user(u):
    global role_table, term_courses
    their_courses = get_enrlmts_for_user(u, role_table)
    num_s = 0
    num_t = 0
    type = 's'
    online_only = 1
    is_online = []
    #print their_courses
    for x in their_courses.iterrows():
        if len(x):
            ttype = x[1]['type']
            if ttype=='StudentEnrollment': num_s += 1
            if ttype=='TeacherEnrollment': num_t += 1
            cid = x[1]['course_id']
            current_term = term_courses[lambda x: x['id']==cid]
            if not current_term.empty:
                is_online.append(current_term['is_online'].values[0])
            else: online_only = 0
        else: online_only = 0
    if num_t > num_s: type='t'
    if len(is_online)==0: online_only = 0
    
    for i in is_online: 
        if i==0: online_only = 0
    #print "Type: " + type + " All online: " + str(online_only) + " Number courses this term: " + str(len(is_online))
    return (u[0],type, online_only, len(is_online))
"""

        
##########
##########
##########    PHOTOS
##########
##########    # todo: threaded

# Doest the account have a photo loaded?
def checkForAvatar(id=2):
    try:
        t = url + '/api/v1/users/%s?include[]=last_login' % str(id)
        r2 = requests.get(t, headers = header)
        result = json.loads(r2.text)
        codecs.open('cache/users/%s.txt' % str(id),'w','utf-8').write( json.dumps(result,indent=2) )
        
        if 'avatar_url' in result:
            if re.search(r'avatar\-50',result['avatar_url']): return 0 
            else: return (result['login_id'], result['avatar_url'], result['name'])
    except Exception as e:
        print("Looking for an avatar / profile pic had a problem: %s" % str(e))
    return 0

# Grab em. Change the first if when continuing after problems....
def downloadPhoto():
    import imghdr
    pix_dir = 'cache/picsCanvas2022/'
    # Update the list of all ilearn users?
    i_last_ix = '-1'
    photo_log_f = ''
    if 0:                           ##  CHANGE TO 0 IF CRASHED / RESUMING....
        ii = fetchAllUsers()
        photo_log_f = open("cache/fotolog.txt", "w")
    else:
        ii = json.loads(codecs.open('cache/allusers_ids.json','r').read())
        photo_log_f = open("cache/fotolog.txt", "r+")
        i_last_ix = -1
        try:
            ab = photo_log_f.read()
            print(ab)
            ac = ab.split("\n")
            print(ac)
            i_last_ix = ac[-2]
            print(i_last_ix)
        except:
            i_last_ix = -1
        i_last_ix = int(i_last_ix)

    
    print("Last user index checked was: %s, which is id: %s" %  \
            (i_last_ix, ii[i_last_ix] ))

    print("Max index is: %i" % len(ii))


    i_last_ix += 1
    for index in range(i_last_ix, len(ii)):
        i = ii[index]
        photo_log_f.write("\n%i" % i )

        a = checkForAvatar(i)
        if a:
            print(str(i) + ":\t" + str(a[0]) + "\t" + str(a[2]) )
            
            try:
                r = requests.get(a[1], stream=True)
                if r.status_code == 200:
                    r.raw.decode_content = True
                    h=r.raw
                    with open(pix_dir + a[0].lower(), 'wb') as f:
                        shutil.copyfileobj(h, f) 
                    # rename to right file extension          
                    # TODO: Change imghdr to PILLOW (PIL)
                    #img = Image.open(filename)
                    #img_type = img.format  # 'JPEG'
                    img_type = imghdr.what(pix_dir + a[0].lower())
                    if img_type == 'jpeg': img_type = 'jpg'
                    try:
                        shutil.move(pix_dir + a[0].lower(),pix_dir + a[0].lower()+'.'+img_type)
                    except Exception as e:
                        print("   \tCouldn't rewrite file")
                else:
                    print(str(i) + ":\t didn't get expected photo")
            except Exception as e:
                print("   \tProblem with download " + str(e))
        else:
            print(str(i) + ":\tno user or no photo")
            pass
            

def mergePhotoFolders():
    
    staff = [ row for row in csv.reader(  open('cache/teacherdata/staff_main_table.csv','r') ) ]
    
    headers = staff[0]
    staff = staff[1:]
    
    activestaff = []
    
    for i,h in enumerate(headers):
        #print("%i. %s" % (i,h) )
        pass
    
    for S in staff:
        if S[7] and S[15]:   #   if teacher (name present) and sp20crns (taught in sp20)
            activestaff.append(S[9].lower())
    activestaffset=set(activestaff)
    
    #return
    
    a = 'cache/picsCanvas'
    b = 'gui/public/picsCanvas2018'
    c = 'gui/public/picsCanvasAll'
    
    
    # I want a big list of who has an avatar pic. 
    
    # and i want to know how many updated since last DL, and how many are in only one or the other. 
    
    
    old = os.listdir(b)
    count = defaultdict(int)
    
    oldset = set()
    newset = set()
    
    for O in old:
        if O.endswith('.jpg') or O.endswith('.png'):
            g = O.split(r'.')[0]
            oldset.add(g)
    
    for N in os.listdir(a):
        if N.endswith('.jpg') or N.endswith('.png'):
            g = N.split(r'.')[0]
            newset.add(g)
    
    """print("Active SP20 Teachers")
    print(activestaffset)

    print("Old Avatars")
    print(oldset)

    print("New Avatars")
    print(newset)"""
    
    updated_set = oldset.union(newset)
    
    tch_set = updated_set.intersection(activestaffset)
    
    only_old = oldset.difference(newset)
    
    only_new = newset.difference(oldset)
    
    print("Tch: %i  Old: %i New: %i" % (len(activestaffset),len(oldset),len(newset)))
    
    print("All avatars: %i  Teachers: %i   Only in old: %i   Only in new: %i" % (  len(updated_set), len(tch_set), len(only_old), len(only_new)))

    allpics = os.listdir(c)
    
    haveapic = {}
    for A in allpics:
        if A.endswith('.jpg') or A.endswith('.png'):
            g = (A.split(r'.')[0]).upper()
            
            haveapic[g] = A
    outie = codecs.open('gui/public/pics.json','w').write( json.dumps( haveapic,indent=2))
            
        
def mergePhotoFolders2():
    
    staff = [ row for row in csv.reader(  open('cache/teacherdata/staff_main_table.csv','r') ) ]
    
    headers = staff[0]
    staff = staff[1:]
    
    activestaff = []
    
    for i,h in enumerate(headers):
        #print("%i. %s" % (i,h) )
        pass
    
    for S in staff:
        if S[5]:
            activestaff.append(S[9].lower())
    
    a = 'cache/picsCanvas'
    b = 'gui/public/picsCanvas2018'
    c = 'gui/public/picsCanvasAll'
    
    old = os.listdir(b)
    count = defaultdict(int)
    for N in os.listdir(a):
        if N.endswith('.jpg') or N.endswith('.png'):
            g = N.split(r'.')[0]
            if g in activestaff:
                count['s'] += 1
            if N in old:
                #print( "Y - %s" % N)
                count['y'] += 1
            else:
                #print( "N - %s" %N )
                count['n'] += 1
        else:
            #print("x - %s" % N)
            count['x'] += 1
    print("Of the 2020 avatars, %i are in the 2018 folder, and %i are new." % (count['y'],count['n']))
    print("Of %i active teachers, %i have avatars." % (len(activestaff),count['s']))
    #print(json.dumps(count,indent=2))
        
    
# Go through my local profile pics, upload any that are missing.
def uploadPhoto():
    files = os.listdir('pics2017')
    #print json.dumps(files)
    pics_i_have = {}
    #goo = "g00188606"
    canvas_users = json.loads(open('canvas/users.json','r').read())
    t = url +  '/api/v1/users/self/files'
    i = 0
    j = 0
    pics_dir = 'pics2017/'
    
    for x in canvas_users:
        j += 1
        if x['login_id'].lower() + '.jpg' in files:
            #print x['login_id'] + " " + x['name']
            i += 1
            pics_i_have[x['id']] = x
    
    print('Canvas users: ' + str(j))
    print('Pic matches: ' + str(i))
    account_count = 0
    ids_i_uploaded = []
    
    for id, target in list(pics_i_have.items()):
        #if account_count > 50:
        #    print 'Stopping after 5.'
        #    break
            
        print('trying ' + target['name'] + '(' + str(id) + ')')
        if checkForAvatar(id):
            print("Seems to have avatar loaded.")
            continue
        
        goo = target['login_id'].lower()
        local_img = pics_dir + goo + '.jpg'
        inform_parameters = {
            'name':goo + '.jpg',
            'size':os.path.getsize(local_img), # read the filesize
            'content_type':'image/jpeg',
            'parent_folder_path':'profile pictures',
            'as_user_id':'{0}'.format(id)
        }
         
        res = requests.post(t, headers = header, data=inform_parameters)
        print("Done prepping Canvas for upload, now sending the data...")
        json_res = json.loads(res.text,object_pairs_hook=collections.OrderedDict)
        files = {'file':open(local_img,'rb').read()}
      
        _data = list(json_res.items())
        _data[1] = ('upload_params',list(_data[1][1].items()))
        print("Yes! Done sending pre-emptive 'here comes data' data, now uploading the file...")
        upload_file_response = requests.post(json_res['upload_url'],data=_data[1][1],files=files,allow_redirects=False)
        # Step 3: Confirm upload
        print("Done uploading the file, now confirming the upload...")
        confirmation = requests.post(upload_file_response.headers['location'],headers=header)
        if 'id' in confirmation.json():
            file_id = confirmation.json()['id'] 
        else:
            print('no id here')
        #print(confirmation.json())
        print("upload confirmed...nicely done!")
        
        time.sleep(1)
        # Make api call to set avatar image to the token of the uploaded imaged (file_id)
        params = { 'as_user_id':'{0}'.format(id)}
        avatar_options = requests.get(url + "/api/v1/users/%s/avatars"% '{0}'.format(id),headers=header,params=params)
        #print "\nAvatar options: " 
        #print avatar_options.json()
        for ao in avatar_options.json():
            #print ao.keys()
            if ao.get('display_name')==goo + '.jpg':
                #print("avatar option found...")
                #print((ao.get('display_name'),ao.get('token'), ao.get('url')))
                params['user[avatar][token]'] = ao.get('token')
                set_avatar_user = requests.put(url + "/api/v1/users/%s"% '{0}'.format(id),headers=header,params=params)
                if set_avatar_user.status_code == 200:
                    print(('success uploading user avatar for {0}'.format(id)))
                    account_count += 1
                    ids_i_uploaded.append(id)
                else:
                    print('some problem setting avatar')
            else:
                pass #print 'didnt get right display name?'
    print("Uploaded these guys: " + json.dumps(ids_i_uploaded))
    

##########
##########
##########    EMAILING PEOPLE
##########
##########


#def test_email():
#    send_z_email("Peter Howell", "Peter", "phowell@gavilan.edu", ['CSIS85','CSIS42'])
    

def create_ztc_list():
    course_combos = pd.read_csv('cache/teacher_course_oer_email_list.csv')
    course_combos.fillna('',inplace=True)
    
    # read this file and make it a dict (in one line!)
    dept_counts = { x[0]:x[1].strip() for x in [ y.split(',') for y in open('cache/teacher_course_oer_deptcount.csv','r').readlines() ][1:] }
    
    
    course_template = "<a href='%s'>%s</a> &nbsp; &nbsp;"
    url_template = "https://docs.google.com/forms/d/e/1FAIpQLSfZLQp6wHFEdqsmpZ7jz2Y8HtKLo8XTAhrE2fyvTDOEgquBDQ/viewform?usp=pp_url&entry.783353363=%s&entry.1130271051=%s"    #   % (FULLNAME, COURSE1)


    # list depts
    mydepts = sorted(list(set(course_combos['dept'] )))
    i = 0
    outp = open("output/oer_email_list.csv","w")
    outp.write("fullname,firstname,email,link,courses\n")
    
    ones_i_did = [ int(x) for x in "40 38 31 21 7 12 24 25 1 13 18 22 44 55 56 51 20 16 2 3 4 5 6 8 9 10 11 14 15 17 23 53 52 50 30 48 39 37 54 49 47 46 45 43 42 41 33 32 29 28 27 26".split(" ") ]
    
    for D in mydepts:
        i += 1
        extra = ''
        if D in dept_counts:
            extra = " (%s)" % dept_counts[D]
        extra2 = ''
        if i in ones_i_did: 
            extra2 = "xxxx "
        print("%s  %i. %s %s" % (extra2,i,D,extra))
    choice_list = input("Which department? (for multiple, separate with spaces) ").split(' ')
    
    all_people_df = []
    
    for choice in choice_list:
        is_cs = course_combos['dept']==mydepts[int(choice)-1] 
        filtered =  pd.DataFrame(course_combos[is_cs])
        if len(all_people_df): all_people_df = pd.concat([filtered,all_people_df])
        else: all_people_df = filtered
        print(mydepts[int(choice)-1])
        print(all_people_df)
        print(' ')
    all_people_df.sort_values(by=['name'],inplace=True)    
    print(all_people_df)
    
    b = all_people_df.groupby(['name'])
    for name,group in b:
        if name == 'no data': continue
        nameparts = name.split(', ')
        fullname = nameparts[1] + ' ' + nameparts[0]
        firstname = nameparts[1]
        
        outp.write(fullname + ',' + firstname + ',')
        email = ''
        link = ''
        courses = []
        flag = 1
        for i in group.iterrows():
            g = i[1]   # wtf is this shi.....
            this_course = g.dept + ' ' + str(g.codenum) + g.codeletter
            courses.append( this_course ) #print(g)
            email = g.email
            if flag:
                link = url_template % (fullname, this_course)
            flag = 0
        
        outp.write(email + ',' + link + "," + "    ".join(courses) + "\n")
        
    outp.close()
    

##########
##########
##########    FORENSICS TYPE STUFF
##########
##########

# better name for this standard fetch. so they stay together in alpha order too....

def get_user_info(id):
    u = fetch( '/api/v1/users/%i' % id )
    ff = codecs.open('cache/users/%i.txt' % id, 'w', 'utf-8')
    ff.write( json.dumps(u, indent=2))
    return u


# these are any messages that get pushed out to their email
def comm_mssgs_for_user(uid=0):
    if not uid:
        uid = input('Canvas id of the user? ')
    u = url + '/api/v1/comm_messages?user_id=%s&start_time=%s&end_time=%s' % (uid,'2021-01-01T01:01:01Z','2021-08-01T01:01:01Z')     # &filter[]=user_%s' % uid
    convos = fetch(u,1)
    
    oo = codecs.open('cache/comms_push_user_%s.txt' % str(uid), 'w')
    oo.write('USER %s\n' % uid)
    oo.write(json.dumps(convos, indent=2))
    
    print(convos)


#
def convos_for_user(uid=0):
    if not uid:
        uid = input('Canvas id of the user? ')
    u = url + '/api/v1/conversations?include_all_conversation_ids=true&as_user_id=%s' % uid     # &filter[]=user_%s' % uid
    convos = fetch(u,1)
    
    oo = codecs.open('cache/convo_user_%s.txt' % str(uid), 'w')
    oo.write('USER %s\n' % uid)
    oo.write(json.dumps(convos, indent=2))
    
    convo_ids_list = convos["conversation_ids"]
    print(convo_ids_list)
    
    u2 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=archived&as_user_id=%s' % uid     # &filter[]=user_%s' % uid
    archived_convos = fetch(u2,1)
    try:
        aconvo_ids_list = archived_convos["conversations_ids"]
        print(aconvo_ids_list)
    except:
        print("didnt seem to be any archived.")
        aconvo_ids_list = []
    
    u3 = url + '/api/v1/conversations?include_all_conversation_ids=true&scope=sent&as_user_id=%s' % uid     # &filter[]=user_%s' % uid
    sent_convos = fetch(u3,1)
    try:
        sconvo_ids_list = sent_convos["conversations_ids"]
        print(sconvo_ids_list)
    except:
        print("didnt seem to be any sent.")
        sconvo_ids_list = []
    
    convo_ids_list.extend(aconvo_ids_list)
    convo_ids_list.extend(sconvo_ids_list)
    
    
    ##
    ## Now get all the messages in each of these conversations
    ##
    
    for cid in convo_ids_list:
        print("Fetching conversation id: %s" % cid)
        oo.write("\n\n----------------\nconversation id: %s\n\n" % cid)
        
        u4 = url + '/api/v1/conversations/%s?as_user_id=%s' % (cid,uid)       #     ' % (cid, uid
        coverstn = fetch(u4,1)
        oo.write("\n%s\n\n" % json.dumps(coverstn,indent=2))
        
        
    """
    for c in convos:
        c['participants'] = ", ".join([ x['name'] for x in c['participants'] ])
    includes = tuple("last_message subject last_message_at participants".split(" "))
    convos = list( \
            reversed([ funcy.project(x, includes) for x in convos ]))
    """
    
    #
    
    #print(json.dumps(convos, indent=2))


# single q sub
def quiz_get_sub(courseid, quizid, subid=0):
    u = url + "/api/v1/courses/%s/quizzes/%s/submissions/%s" % ( str(courseid), str(quizid), str(subid) )

    u = url + "/api/v1/courses/%s/quizzes/%s/questions?quiz_submission_id=%s" % \
            ( str(courseid), str(quizid), str(subid) )

    u = url + "/api/v1/courses/%s/assignments/%s/submissions/%s?include[]=submission_history" % \
            ( str(courseid), str(quizid), str(subid) )

    u = url + "/api/v1/courses/%s/students/submissions?student_ids[]=all&include=submission_history&grouped=true&workflow_state=submitted" % str(courseid)
    return fetch(u)

    #?quiz_submission_id=%s"  

# quiz submissions for quiz id x, in course id y
def quiz_submissions(courseid=9768, quizid=32580):
    #subs = quiz_get_sub(courseid, quizid)
    #print( json.dumps( subs, indent=2 ) )

    if 1:
        # POST
        data = { "quiz_report[includes_all_versions]": "true", "quiz_report[report_type]": "student_analysis" }

        u = url + "/api/v1/courses/%s/quizzes/%s/reports?" % ( str(courseid), str(quizid) )
        res = requests.post(u, headers = header, data=data)
        print(res.content)

    #u2 = url + "/api/v1/courses/%s/quizzes/%s/reports" % ( str(courseid), str(quizid) )
    #res2 = fetch(u2)
    #print( json.dumps(res2.content, indent=2))

    jres2 = json.loads( res.content )
    print(jres2)
    if jres2['file'] and jres2['file']['url']:
        u3 = jres2['file']['url']
        r = requests.get(u3, headers=header, allow_redirects=True)
        open('cache/quizreport.txt', 'wb').write(r.content)
    return

    for R in res2:
        if R['id'] == 7124:
            u3 = R['url']
            r = requests.get(u3, headers=header, allow_redirects=True)
            open('cache/quizreport.txt', 'wb').write(r.content)
    return

    u3 = url + "/api/v1/courses/%s/quizzes/%s/reports/%s" % ( str(courseid), str(quizid), res2[''] )

    oo = codecs.open('cache/submissions.json','w', 'utf-8')
    oo.write('[\n')
    for s in subs:
        if len(s['submissions']):
            j = json.dumps(s, indent=2) 
            print(j)
            oo.write(j)
            oo.write('\n')

    oo.write('\n]\n')
    return 0


    #u = url + "/api/v1/courses/%s/quizzes/%s/submissions?include[]=submission" % (str(courseid), str(quizid))
    u = url + "/api/v1/courses/%s/quizzes/%s/submissions" % (str(courseid), str(quizid))
    subs = fetch(u, 0)
    print( json.dumps( subs, indent=1 ) )

    for S in subs['quiz_submissions']:
        print(json.dumps(S))
        submis = quiz_get_sub(courseid, quizid, S['id'])
        print(json.dumps(submis, indent=2))


# return (timeblock, course, read=0,write=1)
def requests_line(line,i=0):
    try:
        L = line  # strip?
        if type(L) == type(b'abc'): L = line.decode('utf-8')
        for pattern in unwanted_req_paths:
            if pattern in L: 
                return 0
        i = 0
        line_parts = list(csv.reader( [L] ))[0]
        #for p in line_parts:
        #    print("%i\t%s" % (i, p))
        #    i += 1

        d = parser.parse(line_parts[7])
        d = d.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
        d = timeblock_24hr_from_dt(d)

        #r = re.search('context\'\:\s(\d+)', line_parts[22])
        #c = 0
        #if r:
        #    c = r.groups(1)
        str1 = line_parts[20]
        str2 = str1.replace("'",'"')
        str2 = str2.replace("None",'""')
        #print(str2)
        j = json.loads(str2  )
        c = j['context']
        a = line_parts[5]
        #print( str( (d, c, a) ))
        return (d, str(c), a)
    except Exception as e:
        #print("Exception: " + str(e))
        return 0


# 
def report_logs(id=0):
    if not id:
        L = ['10531', ]
    else:
        L = [ id, ]
    report = []
    for id in L:
        emt_by_id = course_enrollment(id)
        for U in emt_by_id.values():
            user_d = defaultdict( int )
            print( "Lookin at user: %s" % U['user']['name'] )
            report.append( "User: %s\n" % U['user']['name'] )
            log_file_name = 'cache/users/logs/%i.csv' % U['user']['id']
            if path.exists(log_file_name):
                print("Log file %s exists" % log_file_name)
                temp = open(log_file_name, 'r').readlines()
                for T in temp[1:]:
                    #print(T)
                    result = requests_line(T)
                    if result:
                        (d, c, a)  = result
                        if c == id:
                            user_d[d] += 1
            print(json.dumps(user_d, indent=2))
            for V in sorted(user_d.keys()):
                report.append( "\t%s: %i\n" % ( dt_from_24hr_timeblock(V), user_d[V]) )
        report.append("\n\n")
    return report


def track_users_in_sem():
    L = users_this_semester_db()
    sL = list(L)
    sL.sort(reverse=True)
    fetch_queue = queue.Queue()
    
    for i in range(num_threads):
        worker = Thread(target=track_user_q, args=(i,fetch_queue))
        worker.setDaemon(True)
        worker.start()

    for U in sL:
        print( "adding %s to the queue" % U )
        fetch_queue.put( U )

    fetch_queue.join()
    print("Done.")


def track_users_in_class(L=[]):
    if len(L)==0:
        #id = '10531'
        ids = input("Course ids, separated with comma: ")
        L = [x for x in ids.split(',')]
    print("Getting users in: " + str(L))

    fetch_queue = queue.Queue()
    
    for i in range(num_threads):
        worker = Thread(target=track_user_q, args=(i,fetch_queue))
        worker.setDaemon(True)
        worker.start()
    

    users_set = set()
    for id in L:
        emt_by_id = course_enrollment(id)
        print(emt_by_id)
        for U in emt_by_id.values():
            if not U['user_id'] in users_set:
                print(U)
                print( "adding %s to the queue" % U['user']['name'] )
                fetch_queue.put( U['user_id'] )
                users_set.add(U['user_id'])

    all_reports = []
    fetch_queue.join()
    print("Done with %i users in these courses." % len(users_set))
    for id in L:
        rpt = report_logs(id)
        all_reports.append(rpt)
        outp = codecs.open('cache/courses/report_%s.txt' % id, 'w', 'utf-8')
        outp.write(''.join(rpt))
        outp.close()
    return all_reports

def track_user_q(id, q):
    while True:
        user = q.get()
        print("Thread %i: Going to download user %s" % (id, str(user)))
        try:
            track_user(user, id)
        except FetchError as e:
            pass
        q.task_done()


def count_courses(csv_file="cache/users/logs/45268.csv"):
    course_counts = defaultdict(int)
    course_regex = r'courses/(\d+)/'

    with open(csv_file, mode='r') as file:
        reader = csv.reader(file)
        for row in reader:
            match = re.search(course_regex, row[1])
            if match:
                course_id = match.group(1)
                course_counts[course_id] += 1

    for course_id, count in course_counts.items():
        print(f"Course ID {course_id}: {count} occurrences")


# honestly it doesn't make much sense to get full histories this way if they're
# already in the canvas data tables....

# just the most recent hits or a short period
# 
# Live data would be better.

# Maintain local logs. Look to see if we have some, download logs since then for a user.
def track_user(id=0,qid=0):
    global recvd_date 
    L = [id,]
    if not id:
        ids = input("User ids (1 or more separated by comma): ")
        L = [int(x) for x in ids.split(',')]
    print("Getting users: " + json.dumps(L))


    for id in L:
        id = int(id)
        # Open info file if it exists, check for last day retrived
        try:
            infofile = open("cache/users/%i.txt" % id, 'r')
            info = json.loads( infofile.read() )

            # TODO: set up this info file if it isn't there. check any changes too. it
            # was written where?....
            infofile.close()
        except Exception as e:
            print("failed to open info file for user id %i" % id)

            info = get_user_info(id)

        print("(%i) Student %i Info: " % (qid,id))
        #print( json.dumps(info, indent=2))

        url_addition = ""
        
        if 1:    # hard code dates
            start_date = "2021-01-01T00:00:00-07:00"
            end_date   = dt.now().strftime("%Y-%m-%dT%H:%M:%S-07:00")                               # 
            end_date = "2026-07-01T00:00:00-07:00"
            url_addition = f"?start_time={start_date}&end_time={end_date}"
        elif 'last_days_log' in info:
            print("There's existing log data for %s (%s)" % (info['name'] , info['sis_user_id']))
            print("Last day logged was: %s" % info['last_days_log'])
            url_addition = "?start_time=%s" % info['last_days_log']
            the_stamp = parser.parse(info['last_days_log']) 
            the_stamp = the_stamp.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
            now = dt.now()
            now = now.replace(tzinfo=pytz.timezone('UTC')).astimezone(pytz.timezone('US/Pacific'))
            dif = now - the_stamp
            print("It was %s ago" % str(dif))
            if the_stamp < lds_stamp:
                print("Too long, taking default")
                url_addition = "?start_time=%s" % log_default_startdate
            
            #lds_stamp = parser.parse(log_default_startdate)

##########
        else:
            url_addition = "?start_time=%s" % log_default_startdate
            #if dif.days > 1:

        url = "/api/v1/users/%i/page_views%s" % (id, url_addition)
        print(url)

        try:

            api_gen = fetch_stream(url,0)
            
            log_file_name = 'cache/users/logs/%i.csv' % id
            if path.exists(log_file_name):
                print("Log file %s exists" % log_file_name)
                temp = open(log_file_name, 'a', newline='')
                csv_writer = csv.writer(temp) 
            else:
                print("Creating new log file: %s" % log_file_name)
                temp = open(log_file_name, 'w', newline='')    ### TODO  
                csv_writer = csv.writer(temp) 
            
            
            count = 0
            for result in api_gen:
                if count == 0 and len(result):
                    header = result[0].keys() 
                    csv_writer.writerow(header) 
                    # results come in newest first....
                    recvd_date = result[0]['updated_at']
                    print("(%i) Most recent hit is %s" % (qid,recvd_date))

                count += len(result)
                indent = "    " * qid
                #print("(%i) Got %i records, %i so far" % (qid,len(result),count))
                print("(%s - %i) %s %i" % (qid, id, indent, count))
                if count > max_log_count:
                    print("Too many logs, bailing. sorry.")
                    break
           
                for R in result:
                    csv_writer.writerow(R.values()) 

            latest = parser.parse(recvd_date)
            #last_full_day = (latest - timedelta(days=1)).isoformat()
            info['last_days_log'] = recvd_date    #last_full_day

            infofile = open("cache/users/%i.txt" % id, 'w')
            infofile.write(json.dumps( info, indent=2 ))
            infofile.close()
              
            print("(%i) Output to 'cache/users/log/%i.csv'" % (qid,id))
        except FetchError as e:
            print("Getting a 502 error.")
            raise FetchError()
        except Exception as e2:
            print("Got an error receiving logs: %s" % str(e2))

#
def track_users_by_teacherclass():
    all_teachers = teachers_courses_semester()

    skip_to = "Punit Kamrah"
    skipping = 1

    grouped = funcy.group_by( lambda x: x[4], all_teachers )
    g2 = {}
    for k,v in grouped.items():
        print(k)
        if skipping and skip_to != k:
            print("skipping")
            continue
        skipping = 0

        g2[k] = list(funcy.distinct( v, 1 ))
        print("\n\n\n\n\n")
        print(k)
        print("\n\n\n\n\n")

        teacherfile = codecs.open('cache/teacherdata/reports/%s.txt' % k.replace(" ","_"),'w','utf-8')
        class_ids = funcy.lpluck(1,v)
        class_names = funcy.lpluck(2,v)
        print(class_ids)
        print(class_names)
        
        rpts = track_users_in_class(class_ids)

        for i, R in enumerate(rpts):
            teacherfile.write('\n\n\n---\n\n%s \n\n' % class_names[i])
            teacherfile.write(''.join(R))
            teacherfile.flush()
        teacherfile.close()
    print(json.dumps(g2, indent=2))
 

def section_enroll():
    user = input("user id> ")
    sect = input("section id> ")
    u = f"{url}/api/v1/sections/{sect}/enrollments"
    
    param = {
        'enrollment[user_id]':user,
        'enrollment[type]': 'StudentEnrollment',
        'enrollment[enrollment_state]': 'active',
    }
        
    res = requests.post(u, headers = header, data=param)
    print(res.text)

def one_course_enrol():

    users = input("user,ids,separated,by,commas> ").split(',')
    course = input("course id> ")
    my_types = {1:'StudentEnrollment', 2:'TeacherEnrollment'}
    print(json.dumps(my_types,indent=2))
    the_type = my_types[ int(input("enrollment type> ")) ]
    # the_type = 'StudentEnrollment'  
    # the_type = 'TeacherEnrollment'
    u = f"{url}/api/v1/courses/{course}/enrollments"
    
    for user in users:
        param = {
            'enrollment[user_id]':user,
            'enrollment[type]': the_type,
            'enrollment[enrollment_state]': 'active',
        }
         
        res = requests.post(u, headers = header, data=param)
        print(res.text)


def find_new_teachers():
    filename = "cache/fa22_sched.json"
    jj = json.loads(codecs.open(filename,'r','utf-8').read())
    for J in jj:
        print( J['teacher'])
    

import traceback


def find_no_goo():

    DO_DELETE_USERS = 0
    DO_DELETE_PORTFOLIOS = 0 
    
    output = codecs.open('cache/no_goo_numbers.json','w','utf-8')
    output2 = codecs.open('cache/wrong_root_acct.json','w','utf-8')
    output3 = codecs.open('cache/wrong_sis_import_id.json','w','utf-8')
    output4 = codecs.open('cache/bad_portfolios.json','w','utf-8')
    #output5 = codecs.open('cache/bad_portfolios_detail.html','w','utf-8')
    all = []
    no_root = []
    no_sis = []
    port = []
    i = 0
    j = 0
    k = 0
    p = 0
    users = json.loads(codecs.open('cache/allusers.json','r','utf-8').read())
    for u in users: 
        if not 'login_id' in u:
            print(u['name'])
            i+=1
            all.append(u)
            user_port = []
            pp = fetch(url + '/api/v1/users/%s/eportfolios' % str(u['id']))
            for p_user in pp:
                try:
                    user_port.append(  fetch(url+'/api/v1/eportfolios/%s' % str(p_user['id'])   ) )
                    if DO_DELETE_PORTFOLIOS:
                        #output5.write("<br />deleted: <a href='https://ilearn.gavilan.edu/eportfolios/%s'>%s\n" % (str(p_user['id']),str(p_user['id']))  )
                        #output5.flush()
                        del_request = requests.delete(url + "/api/v1/eportfolios/%s"    % str(p_user['id'])  ,headers=header)
                        print(del_request.text)
                except Exception as e:
                    traceback.print_exc()
            p += len(pp)
            port.append(pp)
            
            if DO_DELETE_USERS:
                print("Deleting %s..." % u['name'])
                del_request = requests.delete(url + "/api/v1/accounts/1/users/%s"    % str(u['id'])  ,headers=header)
                print(del_request.text)
        if 'root_account' in u and u['root_account'] != "ilearn.gavilan.edu":
            no_root.append(u)
            j += 1
        if 'sis_import_id' in u and not u['sis_import_id']:
            no_sis.append(u)
            k += 1
    print("Found %i users without G numbers" % i)
    print("Found %i users with non gav root account" % j)
    print("Found %i users without sis id" % k)
    print("Found %i questionable portfolios" % p)
    output.write(  json.dumps(all,indent=2) )
    output2.write(  json.dumps(no_root,indent=2) )
    output3.write(  json.dumps(no_sis,indent=2) )
    output4.write(  json.dumps(port,indent=2) )
    

def track_a_user():
    a = input("User ID? ")
    track_user(a)
    
def compare_db_tables():
    import requests

    # Download JSON files
    url_a = 'http://www.gavilan.edu/staff/tlc/db.php?a=dir'
    url_b = 'http://www.gavilan.edu/staff/tlc/db.php?a=confusers'
    response_a = requests.get(url_a)
    print('got 1')
    response_b = requests.get(url_b)
    print('got 2')

    # Parse JSON data
    data_a = response_a.json()
    data_b = response_b.json()

    by_email_conf = {}
    for item in data_a:
        by_email_conf[item['email']] = item

    # Extract email addresses from each file
    emails_a = {item['email'] for item in data_a}
    emails_b = {item['email'] for item in data_b}

    emails_a = {item for item in emails_a if item is not None}
    emails_b = {item for item in emails_b if item is not None}

    emails_a = {item.lower() for item in emails_a}
    emails_b = {item.lower() for item in emails_b}

    # Find common emails
    common_emails = emails_a.intersection(emails_b)

    # Find distinct emails for each file
    distinct_emails_a = emails_a.difference(emails_b)
    distinct_emails_b = emails_b.difference(emails_a)

    # Print the results
    print("Common Emails:")
    for email in sorted(list(common_emails)):
        print(email)

    print("\nDistinct Emails in Staff directory:")
    for email in sorted(list(distinct_emails_a)):
        print(email)

    print("\nDistinct Emails in conf users table:")
    for email in sorted(list(distinct_emails_b)):
        print(email)
    
    out = codecs.open('cache/users_fix.sql','w','utf-8')
    for e in common_emails:
        out.write(f"update `conf_users` set `p2id`='{by_email_conf[e]['id']}' where lower(`email`)='{e}';\n")


def training_find_goos():
    from openpyxl import Workbook, load_workbook
    from openpyxl.chart import BarChart, Series, Reference
    from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill
    wb = load_workbook("C:/Users/peter/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx")
    print(wb.sheetnames)

    all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())

    records = {}
    sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed','BOOT CAMP','TITLE V GOTT ACADEMY', 'Other Certifications']
    for sname in sheets:
        print(f"{sname}:")
        sheet = wb[sname]
        records[sname] = {}
        for row in sheet.iter_rows():
            try:
                name = row[0].value
                if row[0].value == 'G Number': continue
                lastname = name.split(' ')[-1].lower()
                goo = row[1].value
                if not goo:
                    print(f"   Missing ID: {row[0].value}")
                    for t in all_teachers:
                        if re.search(lastname, t['name'].lower()):
                            print(f"      {t['name']}: {t['sis_user_id']}")
            except Exception as e:
                pass
        print()

# Cross-reference training records with upcoming sections to flag missing GOTT work.
def cross_ref_training():
    from semesters import find_term
    from openpyxl import Workbook, load_workbook
    from openpyxl.chart import BarChart, Series, Reference
    from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font, Fill
    wb = load_workbook("C:/Users/phowell/Downloads/GOTT_Completion_masterlist 2023 DEC.xlsx")
    print(wb.sheetnames)

    term = find_term("sp26")
    
    # Fetch from Canvas DB. Make sure its recently updated.
    # Also relies on schedule being in database. Run localcache2.courses_to_sched()  
    # OR localcache2.refresh_semester_schedule_db()

    #courses = all_2x_sem_courses_teachers('202550', '202570')        # 
    courses = all_sem_courses_teachers(term['bannercode'])   


    # report for email
    report = codecs.open('cache/gott_report.txt','w','utf-8')

    # update local list of teachers from ilearn?
    RELOAD_TEACHERS = 0
    ask = input('download new list of teachers? (y/n) ')
    if ask.strip()=='y': RELOAD_TEACHERS = 1

    RELOAD_SCHEDULE = 0
    ask2 = input('refresh schedule db? (y/n) ')
    if ask2.strip()=='y': RELOAD_SCHEDULE = 1

    if RELOAD_SCHEDULE:
        refresh_semester_schedule_db(term['code'])

    if RELOAD_TEACHERS:
        teacherRolesUpdateCache()
    
    # TODO inefficient but just read it again
    all_teachers = json.loads(codecs.open('cache/ilearn_staff.json','r','utf-8').read())

    course_to_gp, course_to_area, areacode_to_area, area_to_dean, course_to_dean, dean_code_to_name = campus_dept_hierarchy()

    records = {}
    sheets = ['GOTT1', 'GOTT2', 'GOTT4', 'GOTT5', 'GOTT6', 'HUM.STEM', 'POCR Reviewed', 'SU21 Workshop', 'BOOT CAMP', 'GOTT ABC', 'TITLE V GOTT ACADEMY', 'Other Certifications']
    for sname in sheets:
        print(f"{sname}:")
        sheet = wb[sname]
        records[sname] = {}
        for row in sheet.iter_rows():
            if row[1].value == 'ID': continue
            goo = row[1].value
            rowvals = [str(v.value) for v in row]
            records[sname][goo] = rowvals
            print("  " + " ".join(rowvals))
        print()
    #print(json.dumps(records,indent=2))
    teachers = defaultdict(list)

    teachers_bydept = defaultdict(set)
    alldepts = set()

    # reconfigure to key on goo
    by_goo = defaultdict(dict)

    for course,coursedict in records.items():
        print(course)
        for goo,record in coursedict.items():
            if goo=='ID': continue
            if record[0]=='Name': continue
            print(f"    {goo}")
            try:
                if len(record)<3:
                    by_goo[goo][course] = "ok"
                elif record[4]=="None":
                    by_goo[goo][course] = "ok"
                else:
                    by_goo[goo][course] = record[4]   # record the end date
            except:
                print(f"  -- problem with this record {json.dumps(record)}")
                by_goo[goo][course] = 'ok'

    bg_file = codecs.open('cache/gott_by_goo.json','w','utf-8')
    bg_file.write(json.dumps(by_goo,indent=2))


    ##
    ## Start looking at the courses to cross reference
    ##
    for c in courses:
        print(c)
        try:
            goo = c[6]   ## is this changing? c[8]
            crn = c[8]
            name = c[1]        # full course name
            teacher = c[4]     # last, first
            ctype = c[7]
            dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1)
            alldepts.add(dept1)
            d = list(c)
            #d.append(ctype)
            if not ctype:
                print(f"not finding mode for {name}")
                continue
            if ctype=='in-person': continue
            teachers[teacher].append(d)
            teachers_bydept[dept1].add(teacher)
        except: 
            print(f"Problem with {c}, skipping")

    alldepts = list(alldepts)
    alldepts.sort()

    sheet = wb.create_sheet("New Summary")
    r = 1

    deptfont = Font(bold=True)
    flagfont = PatternFill("solid", fgColor="00FFFFCC")

    for thedean in ['et','nl','vc','jn', 'de']:
        sheet.cell(row=r, column=1).value = dean_code_to_name[thedean]
        sheet.cell(row=r, column=1).font = deptfont
        r += 2

        report.write(f"Dean: {dean_code_to_name[thedean]}\n")

        for D in alldepts:
            if not D in course_to_dean:
                print(f"MISSING DEAN for dept: {D}")
            if course_to_dean[D] == thedean:
                if len(teachers_bydept[D]) == 0: continue
                print(f"\n------------\n{D}")
                sheet.cell(row=r, column=1).value = D
                sheet.cell(row=r, column=1).font = deptfont
                r += 1

                for t in teachers_bydept[D]:
                    if t == 'STAFF, STAFF': continue
                    waived = False
                    sects = teachers[t]
                    print(f"Sections for {t}: {sects}")
                    goo = sects[0][6]
                    course_mode = sects[0][7]
                    print(t)
                    sheet.cell(row=r, column=1).value = f"{t}"
                    sheet.cell(row=r, column=2).value = f"{goo}"
                    teacher_row = r
                    r += 1
                    has_gott1 = goo in records['GOTT1']
                    has_gott2 = goo in records['GOTT2']
                    if has_gott1:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT 1 Trained"
                        print(f"   GOTT 1")
                        r += 1
                    if goo in records['Other Certifications']:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT Waived - Outside Training"
                        print(f"   GOTT outside training")
                        waived = True
                        r += 1
                    if has_gott2:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT 2 Trained"
                        print(f"   GOTT 2")
                        r += 1
                    if goo in records['POCR Reviewed']:
                        sheet.cell(row=r, column=2).value = f"✓ POCR Reviewed"
                        print(f"   POCR")
                        waived = True
                        r += 1
                    if goo in records['TITLE V GOTT ACADEMY']:
                        sheet.cell(row=r, column=2).value = f"✓ TITLE V GOTT ACADEMY 2014"
                        print(f"   GOTT Academy")
                        waived = True
                        r += 1
                    missing_requirements = []
                    if (not has_gott1) and (not waived):
                        missing_requirements.append("GOTT 1")
                    if (not has_gott2) and (not waived):
                        missing_requirements.append("GOTT 2")
                    if missing_requirements:
                        sheet.cell(row=teacher_row, column=1).fill = flagfont
                        for requirement in missing_requirements:
                            sheet.cell(row=r, column=2).value = f"- MISSING {requirement}"
                            report.write(f"\tMISSING {requirement}: {t} {goo}\n")
                            r += 1
                    if goo in records['GOTT4']:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT 4 Trained"
                        print(f"   GOTT 4")
                        r += 1
                    if goo in records['GOTT5']:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT 5 Trained"
                        print(f"   GOTT 5")
                        r += 1
                    if goo in records['GOTT6']:
                        sheet.cell(row=r, column=2).value = f"✓ GOTT 6 Trained"
                        print(f"   GOTT ")
                        r += 1
                    if goo in records['SU21 Workshop']:
                        sheet.cell(row=r, column=2).value = f"✓ SU21 Workshop"
                        print(f"   summer 21 workshop")
                        r += 1
                    if goo in records['HUM.STEM']:
                        sheet.cell(row=r, column=2).value = f"✓ Humanizing Stem"
                        print(f"   humanizing stem")
                        r += 1
                    if goo in records['BOOT CAMP']:
                        sheet.cell(row=r, column=2).value = f"✓ Boot Camp Self Paced"
                        print(f"   bc self paced")
                        r += 1
                    if goo in records['GOTT ABC']:
                        sheet.cell(row=r, column=2).value = f"✓ {records['GOTT ABC'][goo][2]} Self Paced"
                        print(f"   GOTT abc self paced")
                        r += 1
                    for s in sects:
                        sheet.cell(row=r, column=2).value = f"{s[3]}"
                        sheet.cell(row=r, column=3).value = f"{s[1]}"
                        r += 1

                        if missing_requirements:
                            report.write(f"\t\t{s[3]}\t{s[1]}\t{s[7]}\n")
                    if missing_requirements:
                        report.write(f"\n")
    
    #for c in sheet.columns:
    #    print(c)
    #    print(f"{c} width: {sheet.column_dimensions[c].value}")
    sheet.column_dimensions['A'].width = 20
    sheet.column_dimensions['B'].width = 30
    sheet.column_dimensions['C'].width = 75
    formatted_date = dt.now().strftime('%Y%m%d')
    wb.save(f"C:/Users/phowell/Downloads/GOTT_Completion_masterlist_{formatted_date}_summarized.xlsx")

def cross_ref_training_withcsv():
    from localcache2 import all_sem_courses_teachers
    from localcache import course_mode
    from localcache import sem_schedule

    gott1 = {}
    g1read = csv.reader(codecs.open('cache/GOTT_1.csv','r','utf-8'))
    i = 0
    for row in g1read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott1[goo] = row

    gott2 = {}
    g2read = csv.reader(codecs.open('cache/GOTT_2.csv','r','utf-8'))
    i = 0
    for row in g2read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott2[goo] = row

    gott4 = {}
    g4read = csv.reader(codecs.open('cache/GOTT_4.csv','r','utf-8'))
    i = 0
    for row in g4read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott4[goo] = row

    gott5 = {}
    g5read = csv.reader(codecs.open('cache/GOTT_5.csv','r','utf-8'))
    i = 0
    for row in g5read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott5[goo] = row

    gott6 = {}
    g6read = csv.reader(codecs.open('cache/GOTT_6.csv','r','utf-8'))
    i = 0
    for row in g6read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott6[goo] = row

    gott9 = {}
    g9read = csv.reader(codecs.open('cache/GOTT_others.csv','r','utf-8'))
    i = 0
    for row in g9read:
        if i == 0:
            headers = row
            #print(f"Headers: {headers}")
            i += 1
        else:
            goo = row[1]
            gott9[goo] = row


    #report = codecs.open('cache/training_report.csv','w','utf-8')
    #out = csv.writer(report)
    #out.writerow(['crn','course','mode','teacher','goo','training','training_date'])

    #sections = sem_schedule('sp24')
    teachers = defaultdict(list)

    teachers_bydept = defaultdict(set)
    alldepts = set()


    courses = all_sem_courses_teachers()
    for c in courses:
        goo = c[7]
        crn = c[2].split(' ')[-1].split('/')[0]
        name = c[2]
        teacher = c[4]
        ctype = course_mode(crn,'sp24')

        dept1 = re.search(r'([A-Z]+)(\d+)',c[2].split(' ')[0]).group(1)
        alldepts.add(dept1)
        d = list(c)
        d.append(ctype)
        if ctype=='in-person': continue
        teachers[teacher].append(d)

        teachers_bydept[dept1].add(teacher)


        #print(f"{crn} {ctype} {name}  ")
        #if goo in gott1:
        #    out.writerow([c[1], c[2], ctype, c[4], goo, "GOTT 1", "/".join(gott1[goo][2:])])
        #
        #else:
        #    out.writerow([c[1], c[2], ctype, c[4], goo, "GOTT 1 MISSING", ''])
    
    alldepts = list(alldepts)
    alldepts.sort()

    for D in alldepts:
        print(f"\n------------\n{D}")
        for t in teachers_bydept[D]:
            if t == 'STAFF STAFF': continue
            sects = teachers[t]
            print(t)
            goo = sects[0][7]
            if goo in gott1:
                print("   + GOTT 1 Trained")
            else:
                print("   - MISSING GOTT 1")
            if goo in gott2:
                print("   + GOTT 2 Trained")
            if goo in gott4:
                print("   + GOTT 4 Trained")
            if goo in gott5:
                print("   + GOTT 5 Trained")
            if goo in gott6:
                print("   + GOTT 6 Trained")
            if goo in gott9:
                print("   + GOTT Waived - Outside Training")
            for s in sects:
                print(f"   {s[8]}   {s[2]}")
            print()

def get_portfolios(id=0):
    if not id:
        id = int( input( "what user id? "))

    p = fetch( f"{url}/api/v1/users/{id}/eportfolios" )

    print(json.dumps(p, indent=2))


def get_port_pages(id=0):
    if not id:
        id = int( input("what portfolio id? "))

    p = fetch(f"{url}/api/v1/eportfolios/{id}/pages")
    print(json.dumps(p, indent=2))

def set_email_skip_confirm():
    user_id = '76741'
    old_email = 'scalhoun@gavilan.edu'
    new_email = 'scalhoun@hartnell.edu'
    create_url = url + f'/api/v1/users/{user_id}/communication_channels'
    print(create_url)

    list_channels = fetch(create_url)
    print(json.dumps(list_channels,indent=2))
    for ch in list_channels:
        id = ch['id']
        result = requests.delete(create_url+f"/{id}", headers=header)
        print(f"deleting id {id}")
        print(result.content)

    print('after deleting:')
    list_channels = fetch(create_url)
    print(json.dumps(list_channels,indent=2))
    print('\n\n')

    body = {
    'communication_channel[address]' : old_email,
    'communication_channel[type]' : 'email',
    'skip_confirmation': True
    }

    response = requests.post(create_url, headers=header, data = body)
    print(response.json()) #confirm that the channel was created

    body = {
    'communication_channel[address]' : new_email,
    'communication_channel[type]' : 'email',
    'skip_confirmation': True
    }

    response = requests.post(create_url, headers=header, data = body)
    print(response.json()) #confirm that the channel was created

    print('after creating:')
    list_channels = fetch(create_url)
    print(json.dumps(list_channels,indent=2))
    print('\n\n')

from html.parser import HTMLParser

class HTMLStripper(HTMLParser):
    def __init__(self):
        super().__init__()
        self.reset()
        self.fed = []

    def handle_data(self, d):
        self.fed.append(d)

    def get_text(self):
        return ' '.join(self.fed)

def strip_html_and_truncate(html, length=25):
    if not html:
        return ""
    stripper = HTMLStripper()
    stripper.feed(html)
    text = stripper.get_text()
    text = ' '.join(text.split())  # collapse all whitespace
    text = re.sub(r'\n',' ', text)
    return text[:length]
def summarize_submissions(submissions):
    summary = []
    for sub in submissions:
        assignment = sub.get("assignment", {})

        summary.append({
            "submission": {
                "id": sub.get("id"),
                "excerpt": strip_html_and_truncate(sub.get("body", "")),
                "grade": sub.get("grade"),
                "submitted_at": sub.get("submitted_at"),
                "workflow_state": sub.get("workflow_state"),
                "missing": sub.get("missing", False),
                "late": sub.get("late", False)
            },
            "assignment": {
                "id": assignment.get("id"),
                "name": assignment.get("name"),
                "excerpt": strip_html_and_truncate(assignment.get("description", "")),
                "due_at": assignment.get("due_at"),
                "is_quiz": assignment.get("is_quiz_assignment", False),
                "points_possible": assignment.get("points_possible")
            }
        })
    return summary

import pytz

def format_assignments_results_table(results):
    def safe(val):
        return str(val) if val is not None else "-"
    
    def clip(text,length=40):
        return (text[:length] + "...") if text and len(text) > length+3 else (text or "")
    
    def to_pacific(iso):
        if not iso:
            return "-"
        utc = dt.strptime(iso, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc)
        pacific = utc.astimezone(pytz.timezone("US/Pacific"))
        return pacific.strftime("%Y-%m-%d %I:%M%p")

    # Sort by assignment due date (missing dates go last)
    def get_due_at(item):
        dt = item["assignment"].get("due_at")
        return dt.max if not dt else dt.strptime(dt, "%Y-%m-%dT%H:%M:%SZ")

    results = sorted(results, key=get_due_at)

    header = (
        "| Type       | Subm/Assmt ID       | Due Date (PT)      | Submitted (PT)       | Grade/Points        | Assignment Excerpt            | Submission Excerpt            |"
    )
    sep = (
        "|------------|---------------------|--------------------|----------------------|----------------------|-------------------------------|-------------------------------|"
    )

    rows = []
    for item in results:
        a = item["assignment"]
        s = item["submission"]

        kind = "quiz" if a.get("is_quiz") else "assignment"
        id_combo = f"{safe(s['id'])}/{safe(a['id'])}"
        due_pt = to_pacific(a.get("due_at"))
        submitted_pt = to_pacific(s.get("submitted_at"))
        grade = safe(s.get("grade"))
        points = safe(a.get("points_possible"))
        flags = []
        if s.get("late"):
            flags.append("late")
        if s.get("missing"):
            flags.append("missing")
        gradepoints = f"{grade}/{points}" + (" " + ",".join(flags) if flags else "")

        row = (
            f"| {kind:<10} | {id_combo:<19} | {due_pt:<18} | {submitted_pt:<20} | {gradepoints:<20} | {clip(a.get('name'),20) + ' - ' + clip(a.get('excerpt')):<49} | {clip(s.get('excerpt')):<29} |"
        )
        rows.append(row)

    return '\n'.join([header, sep] + rows)

def user_course_enrollment(user_id, course_id):
    user_url = f"{url}/api/v1/courses/{course_id}/enrollments"
    myparams = {"user_id": user_id, "type[]": "StudentEnrollment", "state[]": ['active','invited','deleted','rejected','completed','inactive']}
    return fetch(user_url, params=myparams)

def get_student_course_assignments(student_id, course_id):
    submission_params = {
        "student_ids[]": f"{student_id}",
        "include[]": ["assignment"]
    }
    submissions_url = f"{url}/api/v1/courses/{course_id}/students/submissions"
    submissions = fetch(submissions_url, params=submission_params)
    summary = summarize_submissions(submissions)
    fmt = format_assignments_results_table(summary)
    return fmt

def testme():
    course_id = 22054
    student_id = 63638
    x = get_student_course_assignments(student_id, course_id)
    print(x)
    # print(json.dumps(x,indent=2))

#testme()
#exit()

def get_student_course_grades(student_id, course_id):
    results = {}
    # Course details (final grade)

    submission_params = {
        "student_ids[]": [f"user_{student_id}"],
        "include[]": ["assignment"]
    }
    user_url = f"{url}/api/v1/courses/{course_id}/enrollments"
    enrollments = user_course_enrollment(student_id, course_id)
    if not enrollments or 'errors' in enrollments:
        final_score = -1
        final_grade = -1
    else:
        final_score = enrollments[0].get("grades", {}).get("current_score", -1)
        final_grade = enrollments[0].get("grades", {}).get("current_grade", -1)
    # Assignment group mapping
    ag_url = f"{url}/api/v1/courses/{course_id}/assignment_groups"
    ag_list = fetch(ag_url)
    try:
        group_lookup = {ag["id"]: ag["name"] for ag in ag_list}
    except:
        print("groups lookup failed")
    # Submissions
    submissions_url = f"{url}/api/v1/courses/{course_id}/students/submissions"
    submissions = fetch(submissions_url, params=submission_params)

    assignments = []
    for sub in submissions:
        try:
            assignment = sub.get("assignment", {})
            group_id = assignment.get("assignment_group_id")
            group = group_lookup.get(group_id, "Uncategorized")

            assignments.append({
                "assignment_name": assignment.get("name"),
                "category": group,
                "due_at": assignment.get("due_at"),
                "points_possible": assignment.get("points_possible"),
                "score": sub.get("score"),
                "submitted": sub.get("submitted_at") is not None,
                "graded": sub.get("graded_at") is not None or sub.get("score") is not None,
                "late": sub.get("late", False),
                "missing": sub.get("missing", False),
                "excused": sub.get("excused", False)
            })
        except:
            print("failed to get assignment submissions")
            assignments = []

    results = {
        #"course_code": course_name,
        "final_score": final_score,
        "final_grade": final_grade,
        "assignments": assignments
    }

    return results


def summarize_student_logs(id=0):
    import pandas as pd
    import re
    import json
    from collections import defaultdict
    from localcache2 import course_from_id

    if id==0:
        id = input("student id> ")

    # Load CSV
    df = pd.read_csv(f"cache/users/logs/{id}.csv", parse_dates=["updated_at"])

    # Extract course_id from URL
    df["course_id"] = df["url"].str.extract(r"/courses/(\d+)")
    df = df.dropna(subset=["course_id"])
    df["course_id"] = df["course_id"].astype(int)

    # Convert 'participated' to boolean (robustly)
    df["participated"] = df["participated"].astype(str).str.lower() == "true"

    # Truncate to date only
    df["date"] = df["updated_at"].dt.date

    # Group by course
    course_summaries = {}

    for course_id, group in df.groupby("course_id"):
        course_data = {}
        try:
            this_course = course_from_id(course_id)
            course_data['course_code'] = this_course['course_code']
        except Exception as e:
            print(f"didn't find course {course_id}, {e}")
        course_data["first_seen"] = str(group["date"].min())
        course_data["last_seen"] = str(group["date"].max())

        # Histogram of daily hits
        daily_counts = group["date"].value_counts().sort_index()
        course_data["daily_hits"] = {str(k): int(v) for k, v in daily_counts.items()}

        # Participation count
        course_data["participated_count"] = int(group["participated"].sum())

        course_summaries[str(course_id)] = course_data
        course_summaries[str(course_id)]['grades'] = get_student_course_grades(id, course_id)

    # App name tally
    app_name_counts = df["app_name"].fillna("None").value_counts().to_dict()

    # Final output
    output = {
        "courses": course_summaries,
        "app_name_counts": app_name_counts
    }

    # Write to JSON file
    with open(f"cache/users/logs/{id}_summary.json", "w") as f:
        json.dump(output, f, indent=2)

    print(f"Done. Output written to cache/users/logs/{id}_summary.json")


def readable_user_summary():
    import re
    import ast
    import math
    from datetime import timedelta
    from zoneinfo import ZoneInfo

    import pandas as pd


    COURSE_RE = re.compile(r"/courses/(\d+)(?:/|$)")

    def extract_course_id(url, links_str) :
        """Return Canvas course id from URL (/courses/<id>) or from links.context."""
        if url:
            m = COURSE_RE.search(url)
            if m:
                return int(m.group(1))
        if links_str:
            try:
                d = ast.literal_eval(links_str)
                ctx = d.get("context")
                if isinstance(ctx, int):
                    return ctx
            except Exception:
                pass
        return None


    id = input("user id? ")
    csv_path = f"cache/users/logs/{id}.csv"
    gap_minutes = 10
    tz_name = "America/Los_Angeles"
    # Load
    df = pd.read_csv(csv_path)

    # Extract course_id
    df["course_id"] = [
        extract_course_id(u, l) for u, l in zip(df.get("url"), df.get("links"))
    ]

    # Keep only rows with a course_id
    df = df.dropna(subset=["course_id"]).copy()
    df["course_id"] = df["course_id"].astype(int)

    # Parse times (UTC) and convert to local Pacific time (user asked for “Pacific standard time”;
    # we’ll convert to America/Los_Angeles which accounts for DST automatically).
    df["ts_utc"] = pd.to_datetime(df["created_at"], utc=True, errors="coerce")
    df = df.dropna(subset=["ts_utc"]).copy()
    local_tz = ZoneInfo(tz_name)
    df["ts_local"] = df["ts_utc"].dt.tz_convert(local_tz)

    # Sort for gap detection
    df = df.sort_values(["course_id", "session_id", "ts_local"])

    # Session splitting: new session if gap > gap_minutes within same course+session_id
    gap = pd.Timedelta(minutes=gap_minutes)
    # Identify gap starts within each course+session_id stream
    df["gap_new"] = (
        df.groupby(["course_id", "session_id"])["ts_local"]
        .diff()
        .gt(gap)
        .fillna(True)  # first row starts a session
    )
    # Session index within each course+session_id
    df["session_idx"] = df.groupby(["course_id", "session_id"])["gap_new"].cumsum()

    # Session key across all data
    df["session_key"] = list(zip(df["course_id"], df["session_id"], df["session_idx"]))

    # Aggregate to sessions
    agg = (
        df.groupby("session_key")
        .agg(course_id=("course_id", "first"),
            start=("ts_local", "min"),
            end=("ts_local", "max"),
            hits=("ts_local", "size"))
        .reset_index(drop=True)
    )

    # Duration in minutes (ceil; minimum 1 minute)
    dur_secs = (agg["end"] - agg["start"]).dt.total_seconds().clip(lower=0)
    agg["mins"] = [max(1, math.ceil(s / 60)) for s in dur_secs]

    # Group by local calendar day for reporting
    agg["day"] = agg["start"].dt.strftime("%b %-d")  # e.g., "Oct 27"
    # Format start time like "5:05pm"
    agg["start_str"] = agg["start"].dt.strftime("%-I:%M%p").str.lower()

    # Order: day, then chronological within day
    agg = agg.sort_values(["start"])

    # Group by calendar date only
    agg["day_key"] = agg["start"].dt.date

    # Build text report with nice formatting
    lines: list[str] = []
    for day_key, day_df in agg.groupby("day_key", sort=False):
        # Pretty date for the section header
        # Using the first session time for that day to include time
        first_time = day_df["start"].min()
        # linux
        # pretty_day = first_time.strftime("%b %-d %-I:%M%p").lower()  # e.g., "Sep 26 8:02pm"
        
        # windows
        pretty_day = first_time.strftime("%b %#d %#I:%M%p").lower()
        lines.append(f"{pretty_day}:")
        for _, row in day_df.iterrows():
            lines.append(
                f" - course {row.course_id}, {row.start_str}: "
                f"{row.hits} hits over {row.mins} minutes"
            )
    report_out = codecs.open(f"cache/users/logs/{id}_report.txt", "w", "utf-8")
    report_out.write( "\n".join(lines) )

    return "\n".join(lines) if lines else "No course activity found."


    # Example usage:
    # print(generate_course_report("canvas_logs.csv", gap_minutes=10))


if __name__ == "__main__":
    print ("")
    options = { 1: ['Fetch iLearn users with @gavilan.edu email address', teacherRolesUpdateCache],
                2: ['Fetch all users',fetchAllUsers],
                5: ['Download user avatars', downloadPhoto],
                6: ['Merge photo folders', mergePhotoFolders],
                7: ['Get all teachers logs 1 month', get_recent_views],
                8: ['Gather teacher history, a variety of stats.', getTeachersInfoMain],
                9: ['test rtr.', read_training_records],
                10: ['Get a users logs', track_user],
                11: ['tally a users logs from csv file', count_courses],
                #11: ['test: oneYearSchedule', oneYearSchedule],
                12: ['summarize hit activity', activity_summary],
                13: ['Get all users logs in a class', track_users_in_class],
                14: ['Get logs for 1 user', track_a_user ],
                15: ['Get all users logs in a semester', track_users_in_sem],
                16: ['Report on attendance for all classes', track_users_by_teacherclass],
                17: ['Show all convos for a user', convos_for_user],
                21: ['Show all pushed notifications for a user', comm_mssgs_for_user],
                18: ['Quiz submissions', quiz_submissions],
                #19: ['NLP Sample', nlp_sample],
                20: ['Enroll a single user into a class', one_course_enrol],
                21: ['Enroll a student into a section', section_enroll],
                22: ['Teachers new this semester', find_new_teachers],
                #22: ['Sync personnel and conference user databases', user_db_sync], 
                23: ['Find non-gnumbers', find_no_goo ],
                24: ['compare user tables', compare_db_tables],
                
                25: ['cross ref training', cross_ref_training],
                26: ['find goo numbers in training spreadsheet', training_find_goos],

                30: ['get portfolios for user id', get_portfolios],
                31: ['get portfolio pages for portfolio id', get_port_pages],

                40: ['reset user email without confirmation', set_email_skip_confirm],
                41: ['summarize users logs', summarize_student_logs],
                50: ['summarize users logs 2', readable_user_summary], 


                #3: ['Main index, 1 year, teachers and their classes', getAllTeachersInTerm],
                #5: ['Match names in schedule & ilearn', match_usernames],
                #6: ['Create Dept\'s ZTC list', create_ztc_list],
                ##7: ['Build and send ZTC emails', send_ztc_mails],
                #14: ['investigate the logs', investigate_logs],
                #12: ['test: match_usernames', match_usernames],
                #13: ['test: get all names', getAllNames],
                #13: ['x', users_with_gavilan_email], 
    }
    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
        resp = int(sys.argv[1])
        print("\n\nPerforming: %s\n\n" % options[resp][0])
    
    else:
        print ('')
        for key in options:
            print(str(key) + '.\t' + options[key][0])
        
        print('')
        resp = input('Choose: ')
    
    # Call the function in the options dict
    options[ int(resp)][1]()