canvasapp/aws.py


import datetime, pysftp, codecs, re, time, json, traceback, csv, os
import pandas as pd
import boto3
from botocore.exceptions import ClientError
from deepdiff import DeepDiff
from io import StringIO
from time import strptime
from dateutil import parser
from bs4 import BeautifulSoup as bs
from datetime import datetime as dt
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

from canvas_secrets import access_key, access_secret
from canvas_secrets import FTP_SITE, FTP_USER, FTP_PW


# TODO move out

gp = {}
gp['ACCT'] = 'info'
gp['AE'] = 'skill'
gp['AH'] = 'well'
gp['AJ'] = 'skill'
gp['AMT'] = 'skill'
gp['ANTH'] = 'soc'
gp['APE'] = 'skill'
gp['ART'] = 'art'
gp['ASTR'] = 'stem'
gp['ATH'] = 'well'
gp['BIO'] = 'stem'
gp['BOT'] = 'info'
gp['BUS'] = 'info'
gp['CD'] = 'skill'
gp['CHEM'] = 'stem'
gp['CMGT'] = 'skill'
gp['CMUN'] = 'comm'
gp['COS'] = 'skill'
gp['CSIS'] = 'stem'
gp['CWE'] = 'skill'
gp['DM'] = 'info'
gp['ECOL'] = 'stem'
gp['ECON'] = 'info'
gp['ENGL'] = 'soc'
gp['ENGR'] = 'stem'
gp['ENVS'] = 'stem'
gp['ESL'] = 'comm'
gp['ETHN'] = 'comm'
gp['FRNH'] = 'comm'
gp['GEOG'] = 'stem'
gp['GEOL'] = 'stem'
gp['GUID'] = 'soc'
gp['HE'] = 'well'
gp['HIST'] = 'soc'
gp['HUM'] = 'soc'
gp['HVAC'] = 'skill'
gp['JFT'] = 'skill'
gp['JLE'] = 'skill'
gp['JOUR'] = 'comm'
gp['JPN'] = 'comm'
gp['KIN'] = 'well'
gp['LIB'] = 'comm'
gp['LIFE'] = 'well'
gp['MATH'] = 'stem'
gp['MCTV'] = 'art'
gp['MUS'] = 'art'
gp['PHIL'] = 'soc'
gp['PHYS'] = 'stem'
gp['POLS'] = 'soc'
gp['PSCI'] = 'stem'
gp['PSYC'] = 'soc'
gp['RE'] = 'skill'
gp['SJS'] = 'soc'
gp['SOC'] = 'soc'
gp['SPAN'] = 'comm'
gp['THEA'] = 'art'
gp['WELD'] = 'skill'
gp['WTRM'] = 'skill'
gp['MGMT'] = 'skill'
gp['MKTG'] = 'skill'
gp['HTM'] = 'skill'

dean = {}
dean['AH'] = 'et'
dean['HE'] = 'et'
dean['ATH'] = 'et'
dean['KIN'] = 'et'
dean['LIFE'] = 'et'
dean['AE'] = 'ss'
dean['APE'] = 'ss'
dean['ACCT'] = 'ss'
dean['AJ'] = 'ss'
dean['AMT'] = 'ss'
dean['HVAC'] = 'ss'
dean['JFT'] = 'ss'
dean['JLE'] = 'ss'
dean['RE'] = 'ss'
dean['WTRM'] = 'ss'
dean['WELD'] = 'ss'
dean['ANTH'] = 'nl'
dean['ART'] = 'nl'
dean['ASTR'] = 'jn'
dean['BIO'] = 'jn'
dean['BOT'] = 'ss'
dean['BUS'] = 'ss'
dean['CD'] = 'ss'
dean['CHEM'] = 'jn'
dean['CMGT'] = 'ss'
dean['CMUN'] = 'nl'
dean['COS'] = 'ss'
dean['CSIS'] = 'ss'
dean['CWE'] = 'ss'
dean['DM'] = 'ss'
dean['ECOL'] = 'jn'
dean['ECON'] = 'ss'
dean['ENGL'] = 'nl'
dean['ENGR'] = 'jn'
dean['ENVS'] = 'jn'
dean['ESL'] = 'ss'
dean['ETHN'] = 'nl'
dean['FRNH'] = 'nl'
dean['GEOG'] = 'jn'
dean['GEOL'] = 'jn'
dean['GUID'] = 'de'
dean['HIST'] = 'nl'
dean['HUM'] = 'nl'
dean['JOUR'] = 'nl'
dean['JPN'] = 'nl'
dean['LIB'] = 'jn'
dean['MATH'] = 'jn'
dean['MCTV'] = 'nl'
dean['MGMT'] = 'ss'
dean['MKTG'] = 'ss'
dean['HTM'] = 'ss'
dean['MUS'] = 'nl'
dean['PHIL'] = 'nl'
dean['PHYS'] = 'jn'
dean['POLS'] = 'nl'
dean['PSCI'] = 'jn'
dean['PSYC'] = 'nl'
dean['PSYCH'] = 'nl'
dean['SJS'] = 'nl'
dean['SOC'] = 'nl'
dean['SPAN'] = 'nl'
dean['THEA'] = 'nl'

dean_names = {}
dean_names['et'] = 'Enna Trevathan'
dean_names['ss'] = 'Susan Sweeney'
dean_names['nl'] = 'Noah Lystrup'
dean_names['jn'] = 'Jennifer Nari'
dean_names['de'] = 'Diego Espinoza'


def get_secret():

    secret_name = "gav/goo/cred"
    region_name = "us-west-1"

    # Create a Secrets Manager client
    session = boto3.session.Session(
        aws_access_key_id = access_key,
        aws_secret_access_key = access_secret
    )
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        # For a list of exceptions thrown, see
        # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
        raise e

    secret = json.loads(get_secret_value_response['SecretString'])
    return (secret['user'],secret['password'])

DEBUG = 0
GOO, GOO_PIN = get_secret()


def d(s,end=''):
    global DEBUG
    if end and DEBUG: print(s,end=end)
    elif DEBUG: print(s)

def clean_funny(str):
    if str and str.encode('utf8') == '<EFBFBD>': return ''
    return str

def clean_funny2(str):
    if str and str == '\xa0': return ''
    if str and str == '<EFBFBD>': return ''
    return str


def row_has_data(r):      # helper
    if r.find_all('th'):
        return False
    if len(r.find_all('td')) > 2:
        return True
    if re.search('Note\:', r.get_text()):
        return True
    return False

def time_to_partofday(t):
    #todo: account for multiple sites/rows
    # 11:20 am-12:10 pm
    mor     = strptime('12:00 PM', '%I:%M %p')
    mid     = strptime( '2:00 PM', '%I:%M %p')
    aft     = strptime( '6:00 PM', '%I:%M %p')
    if t == 'TBA':
        return 'TBA'
    t = t.upper()
    parts = t.split('-')
    try:
        begin = strptime(parts[0], '%I:%M %p')
        end = strptime(parts[1], '%I:%M %p')
        if end > aft:
            return "Evening"
        if end > mid:
            return "Afternoon"
        if end > mor:
            return "Midday"
        return "Morning"
        #return begin,end
    except Exception as e:
        #print 'problem parsing: ', t, "   ",
        return ""

# Deduce a 'site' field, based on room name and known offsite locations
def room_to_site(room,verbose=0):
    #todo: account for multiple sites/rows
    #todo: better way to store these offsite labels
    othersites = 'AV,SBHS I-243,SBHS I-244,LOADCS,HOPEH,HOPEG,PLY,SAS,SBHS,LOHS,CHS,SBRAT,'.split(',')
    # is it gilroy, mh, hol, other, online or hybrid?
    site = 'Gilroy'
    #if len(course[0]) > 13:
    #    room = course[0][13]
    if room in othersites:
        site = "Other"
    if room == 'TBA':
        site = 'TBA'
    if room == 'AV':
        site = 'San Martin Airport'
    if re.search('MHG',room):
        site = 'Morgan Hill'
    if re.search('HOL',room):
        site = 'Hollister'
    if re.search('COY',room):
        site = 'Coyote Valley'
    if re.search('OFFSTE',room):
        site = 'Other'
    if re.search('ONLINE',room):
        site = 'Online'
    if verbose: print(room, '\t', end=' ')
    return site


# Take banner's html and make a csv(?) file
def ssb_to_csv(src):
    #out = codecs.open(schedfile,'w','utf-8')
    output = 'crn,code,sec,cmp,cred,name,days,time,cap,act,rem,wl_cap,wl_act,wl_rem,teacher,date,loc,ztc,note\n'
    b = bs(src, 'html.parser')
    tab = b.find(class_="datadisplaytable")
    if not tab:
        print("hmm... didn't find a 'datadisplaytable' in this html: ")
        #print(src)
        return 0
    rows = tab.find_all('tr')
    drows = list(filter(row_has_data,rows))
    for dd in drows:
        t = row_text(dd)
        output += t
    return output

def row_text(r):   # helper
    #global dbg

    d("Row Txt Fxn gets:  ")
    arr = []
    for t in r.find_all('td'):
        if t.contents and len(t.contents) and t.contents[0].name == 'img':
            arr.append("1")
            d("img")
        r_text = t.get_text()
        arr.append(r_text)
        if 'colspan' in t.attrs and t['colspan']=='2':
            d('[colspan2]')
            arr.append('')
        d("\t"+r_text, end=" ")
    d('')

    if len(arr)==1 and re.search('Note\:',arr[0]):
        note_line = clean_funny( arr[0] )
        note_line = re.sub(r'\n',' ', note_line)
        note_line = re.sub(r'"','', note_line)
        #note_line = re.sub(r',','\,', note_line)
        return ',,,,,,,,,,,,,,,,,,"' + note_line + '"\n'
    del arr[0]
    arr[1] = clean_funny(arr[1])
    arr[2] = clean_funny(arr[2])
    if arr[1]: arr[1] = arr[1] + " " + arr[2]
    del arr[2]
    arr = [ re.sub(r'&nbsp;','',a) for a in arr]
    arr = [ re.sub(',','. ',a) for a in arr]
    arr = [ re.sub('\(P\)','',a) for a in arr]
    arr = [ a.strip() for a in arr]
    #del arr[-1]
    r = ','.join(arr)+'\n'
    r = re.sub('\n','',r)
    r = re.sub('add to worksheet','',r)
    d("Row Txt Fxn returns:  " + r + "\n\n")

    return r + '\n'


# take text lines and condense them to one dict per section
def to_section_list(input_text,verbose=0):
    this_course = ''
    #todo: no output files
    #jout = codecs.open(filename, 'w', 'utf-8')
    #input = csv.DictReader(open(schedfile,'r'))
    #input = UnicodeDictReader(input_text.splitlines())
    all_courses = []

    try:
        f = StringIO(input_text)
    except:
        print("ERROR with this input_text:")
        print(input_text)
    reader = csv.reader(f, delimiter=',')
    headers = next(reader)
    for r in reader:
        d = dict(list(zip(headers,r)))
        #pdb.set_trace()
        # clean funny unicode char in blank entries
        r = {k: clean_funny2(v) for k,v in list(d.items()) }
        if verbose: print("Cleaned: " + str(r))

        if 'time' in r:
            if r['time']=='TBA': r['time'] = ''
            if r['time']: r['partofday'] = time_to_partofday(r['time'])

        r['type'] = ''

        if 'loc' in r:
            if r['loc'] == 'ONLINE': r['type'] = 'online'
            if r['loc'] == 'ONLINE' and r['time']: r['type'] = 'online live'
            if r['loc'] == 'ONLINE LIVE': r['type'] = 'online live'
            if r['loc']: r['site'] = room_to_site(r['loc'],verbose)

        if 'code' in r:
            if re.search(r'ONLINE\sLIVE',r['code']):
                r['type'] = 'online live'
            elif re.search(r'ONLINE',r['code']):
                r['type'] = 'online'

        # does it have a section? it is the last course
        if r['crn']:   # is a new course or a continuation?
            if verbose: print("  it's a new section.")
            if this_course:
                if not this_course['extra']: this_course.pop('extra',None)
                all_courses.append(this_course)
            this_course = r
            #print(r['name'])
            this_course['extra'] = []
        else:
            # is a continuation line
            if verbose: print("  additional meeting: " + str(r))
            for k,v in list(r.items()):
                if not v: r.pop(k,None)
            # TODO: if extra line is different type?
            #if this_course['type']=='online' and r['type'] != 'online': this_course['type'] = 'hybrid'
            #elif this_course['type']!='online' and r['type'] == 'online': this_course['type'] = 'hybrid'
            this_course['extra'].append(r)
    return all_courses


# Schedule / course filling history
# csv headers:  crn, code, teacher,  datetime, cap, act, wlcap, wlact
# Log the history of enrollments per course during registration
def log_section_filling(current_sched_list):
    rows = 'timestamp crn code teacher cap act wl_cap wl_act'.split(' ')
    rows_j = 'crn code teacher cap act wl_cap wl_act'.split(' ')
    print(rows_j)
    now = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M')
    csv_fn = 'cache/reg_history_' + short_sem + '.csv'
    with codecs.open(csv_fn,'a','utf-8') as f:
        writer = csv.writer(f)
        for S in current_sched_list:
            #print(S)
            items = [now,]
            [ items.append( S[X] ) for X in rows_j ]
            writer.writerow(items)

# Same as above, but compressed, act only
def log_section_filling2(current_sched_list):
    now = datetime.datetime.now().strftime('%Y-%m-%dT%H')

    todays_data = { int(S['crn']): S['act'] for S in current_sched_list }
    #print(todays_data)

    todays_df = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
    todays_df = todays_df.rename_axis('crn')
    #print(todays_df)
    todays_df.to_csv('cache/reg_today_new.csv', index=True)

    try:
        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
        print(myframe)
    except:
        fff = open('cache/reg_data_'+short_sem+'.csv','w')
        fff.write('crn\n')
        fff.close()
        myframe = pd.read_csv('cache/reg_data_' + short_sem + '.csv')
        #myframe = pd.DataFrame.from_dict(todays_data, orient='index', columns=[now])
        #myframe = myframe.rename_axis('crn')
        print("Creating new data file for this semester.")

    new_df = myframe.join( todays_df, on='crn', how='outer' )
    new_df = new_df.rename_axis('crn')
    print(new_df)

    reg_data_filename = 'reg_data_' + short_sem + '.csv'
    new_df.to_csv('cache/' + reg_data_filename, index=False)
    put_file('/home/public/schedule/', 'cache/', reg_data_filename, 0)

# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
def list_latestarts(term):

    show_summary = 1

    the_year = '20' + term[2:4]
    print("year: ", the_year, "  semester: ", term)

    term_in = "cache/%s_sched.json" % term
    term_out = "cache/%s_latestarts.txt" % term
    expanded_out = "%s_sched_expanded.json" % term
    print("Writing output to " + term_out)
    infile = codecs.open(term_in, "r", "utf-8")
    outfile = codecs.open(term_out, "w", "utf-8")
    exoutfile = codecs.open('cache/' + expanded_out, "w", "utf-8")
    expanded = []
    sched = json.loads(infile.read())
    #print sched
    by_date = {}

    if show_summary: print("course \t loc \t type \t time")

    for C in sched:
        if (not C['type']) and C['loc'] != 'ONLINE':  # and C['time']:
            C['type'] = 'in-person'

        if show_summary: print("%s \t %s \t %s \t %s" % (C['code'],C['loc'],C['type'],C['time']))

        if 'extra' in C:
            if 'partofday' in C and ('type' in C['extra'][0]) and (C['extra'][0]['type'] == 'online') and C['loc'] != "ONLINE LIVE":
                C['type'] = 'hybrid'

        times = C['time'].split("-")
        if len(times) > 1:
            time_start = times[0]
            time_end = times[1]

            try:
                startt = time.strptime(time_start,"%I:%M %p")
                endt = time.strptime(time_end,"%I:%M %p")
                min_start = startt.tm_min
                min_end = endt.tm_min
                if min_start == 0: min_start = "00"
                else: min_start = str(min_start)
                if min_end == 0: min_end = "00"
                else: min_end = str(min_end)
                C['time_start'] = "%i:%s" % (startt.tm_hour, min_start )
                C['time_end'] = "%i:%s" % (endt.tm_hour, min_end )
                if 0:
                    print("+  Parsed %s into %s and %s." % (C['time'], C['time_start'], C['time_end']))
            except Exception as e:
                print(e, "\n-- problem parsing time ", time_start, " or ", time_end)
        else:
            C['time_start'] = ''
            C['time_end'] = ''

        if re.search('TBA',C['date']):
            C['start'] = ''
            C['end'] = ''
            C['doy'] = ''
            expanded.append(C)
            continue

        parts = C['date'].split("-")
        start = parts[0] + "/" + the_year
        end = parts[1] + "/" + the_year

        try:
            startd = parser.parse(start)
            endd = parser.parse(end)
            C['start'] = "%i-%i" % (startd.month,startd.day)
            C['end'] = "%i-%i" % (endd.month,endd.day)
            C['doy'] = startd.timetuple().tm_yday
            expanded.append(C)
        except Exception as e:
            print(e, "\n-- problem parsing ", start, " or ", end)
        if not startd in by_date:
            by_date[startd] = []
        by_date[startd].append(C)

    exoutfile.write( json.dumps(expanded,indent=2) )
    exoutfile.close()
    put_file('/home/public/schedule/', 'cache/', expanded_out, 0)

    for X in sorted(by_date.keys()):
        #print("Start: ", X)
        if len(by_date[X]) < 200:
            prettydate = X.strftime("%A, %B %d")
            #print(prettydate + ": " + str(len(by_date[X])) + " courses")
            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
            for Y in by_date[X]:
                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
                #print(Y)
                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
    outfile.close()
    put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)
    return expanded


# Use Firefox and log in to ssb and get full schedule. Only works where selenium is installed
def scrape_schedule():
    #url = "https://ssb.gavilan.edu/prod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu"
    #url = "https://ssb-prod.ec.gavilan.edu/PROD/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"
    url = "https://lum-prod.ec.gavilan.edu/"
    text = ''


    #url = f"https://ssb-prod.ec.gavilan.edu/PROD/bwckgens.p_proc_term_date?p_calling_proc=bwckschd.p_disp_dyn_sched&p_term={sem_code}"

    try:
        driver = webdriver.Firefox()
        driver.get(url)
        driver.implicitly_wait(15)
        driver.find_element("id","usernameUserInput").clear()
        driver.find_element("id","usernameUserInput").send_keys(GOO)
        driver.find_element("name","password").send_keys(GOO_PIN)
        #driver.find_element("name","loginform").submit()
        # enter pw for mygav login
        driver.find_element("xpath","/html/body/div[1]/div/div/div/form/div[3]/div/button").click()
        driver.implicitly_wait(10)

        # click student tab
        print(driver.title)
        driver.find_element("xpath","/html/body/div[2]/nav/div/div[1]/ul/li[3]/a/span").click()
        driver.implicitly_wait(10)

        # Setup wait for later
        wait = WebDriverWait(driver, 10)
        original_window = driver.current_window_handle

        # click ssb button. Opens a new tab
        driver.implicitly_wait(10)
        driver.find_element("xpath","/html/body/div[2]/div/div[3]/div/div[1]/div/div[1]/section/div/div/div/div[1]/div/div/div/h5/a").click()
        driver.implicitly_wait(20)

        # Wait for the new window or tab
        wait.until(EC.number_of_windows_to_be(2))

        # Loop through until we find a new window handle
        for window_handle in driver.window_handles:
            if window_handle != original_window:
                driver.switch_to.window(window_handle)
                break

        print(driver.title)

        driver.find_element(By.LINK_TEXT, "Students").click()
        driver.implicitly_wait(20)
        print(driver.title)

        driver.find_element(By.LINK_TEXT, "Registration").click()
        driver.implicitly_wait(10)
        print(driver.title)

        driver.find_element(By.LINK_TEXT, "Search for Classes").click()
        driver.implicitly_wait(15)
        print(driver.title)

        dd = Select(driver.find_element("name","p_term"))
        if (dd):
            dd.select_by_visible_text(SEMESTER)
            driver.find_element("xpath","/html/body/div/div[4]/form").submit()
            driver.implicitly_wait(15)
            print(driver.title)

        driver.find_element("xpath","/html/body/div/div[4]/form/input[18]").click()
        driver.implicitly_wait(10)
        print(driver.title)

        driver.find_element("name","SUB_BTN").click()
        driver.implicitly_wait(40)
        time.sleep(15)
        driver.implicitly_wait(40)
        print(driver.title)
        text = driver.page_source
        driver.quit()

    except Exception as e:
        print("Got an exception: ", e)
        print("There was an error: " + e.args[0] + ". The line where the code failed was " + str(traceback.extract_stack()[-1][1]))
        return
    finally:
        print("")
        #driver.quit()

    codecs.open('cache/' + filename_html,'w', 'utf-8').write(text)


    #print(text)
    as_list = ssb_to_csv(text)
    #print(as_list)
    as_dict = to_section_list(as_list)
    jj = json.dumps(as_dict,indent=2)

    # TODO
    try:
        print("Opening " + 'cache/'+filename)
        ps = codecs.open('cache/'+filename,'r','utf-8')
        prev_sched = json.loads(ps.read())
        ps.close()
        print("ok")

        if 1:   # sometimes I want to re-run this without affecting the logs.
            log_section_filling(as_dict)
            log_section_filling2(as_dict)

        dd = DeepDiff(prev_sched, as_dict, ignore_order=True)
        print("diff done")
        pretty_json = json.dumps(  json.loads( dd.to_json() ), indent=2 )
        codecs.open('cache/%s_sched_diff.json' % short_sem,'w','utf-8').write(  pretty_json )    # dd.to_json() )

    except Exception as e:
        print(e)
        print("Can't do diff?")

    # Next, rename the prev sched_xxYY.json data file to have its date,
    # make this new one, and then upload it to the website.
    # Maybe even count the entries and do a little sanity checking
    #
    # print("Last modified: %s" % time.ctime(os.path.getmtime("test.txt")))
    # print("Created: %s" % time.ctime(os.path.getctime("test.txt")))

    try:
        last_mod = time.ctime(os.path.getmtime('cache/' + filename))

        import pathlib
        prev_stat = pathlib.Path('cache/' + filename).stat()
        mtime = dt.fromtimestamp(prev_stat.st_mtime)
        print(mtime)
    except:
        print("Couldn't Diff.")
    # fname = pathlib.Path('test.py')
    # assert fname.exists(), f'No such file: {fname}'  # check that the file exists
    # print(fname.stat())
    #
    # os.stat_result(st_mode=33206, st_ino=5066549581564298, st_dev=573948050, st_nlink=1, st_uid=0, st_gid=0, st_size=413,
    #                st_atime=1523480272, st_mtime=1539787740, st_ctime=1523480272)


    codecs.open('cache/' + filename, 'w', 'utf-8').write(jj)

    put_file('/home/public/schedule/', 'cache/', filename, 0)                             #  /gavilan.edu/_files/php/

    return as_dict


def scrape_schedule_multi():

    global SEMESTER, short_sem, semester_begin, filename, filename_html, sem_code


    SEMESTER = 'Fall 2024'
    sem_code = '202470'
    short_sem = 'fa24'
    semester_begin = strptime('08/26', '%m/%d')
    filename = 'fa24_sched.json'
    filename_html = 'fa24_sched.html'

    as_dict = scrape_schedule()

    expanded = list_latestarts(short_sem)
    fields = "gp,dean,dept,num,code,crn,teacher,name,act,cap,site,type".split(",")

    ffcsv = codecs.open('cache/enrollment_%s.csv' % short_sem, 'w', 'utf-8')
    with ffcsv as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(fields)

        for S in expanded:
            parts = S['code'].split(' ')
            S['dept'] = parts[0]
            S['num'] = parts[1]
            S['gp'] = gp[parts[0]]
            S['dean'] = dean[parts[0]]
            S['sem'] = short_sem
            # S['act'] = S['cap']
            if S['loc'] == "ONLINE LIVE": S['site'] = 'OnlineLive'
            csvwriter.writerow( [ S[x] for x in fields ] )

    put_file('/home/public/schedule/', 'cache/', 'enrollment_%s.csv' % short_sem, 0)

################
################  SENDING DATA AWAY
################
################
################

# Upload a json file to www
def put_file(remotepath,localpath, localfile,prompt=1):
    show_all = 0
    folder = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    cnopts = pysftp.CnOpts()
    cnopts.hostkeys = None
    with pysftp.Connection(FTP_SITE,username=FTP_USER, password=FTP_PW,cnopts=cnopts) as sftp:
        #todo: these paths
        #files = sftp.listdir()
        #print(folder + "\tI see these files on remote: ", files, "\n")
        sftp.chdir(remotepath)
        files = sftp.listdir()
        if show_all: print(folder + "\tI see these files on remote: ", files, "\n")

        localf = os.listdir(localpath)

        if show_all: print("I see these local: ", localf)

        if prompt:
            input('ready to upload')
        sftp.put(localpath+localfile, localfile, preserve_mtime=True)
        sftp.close()
        print("Uploaded %s" % localfile)

scrape_schedule_multi()