diff --git a/pipelines.py b/pipelines.py index e4104a2..cfe930f 100644 --- a/pipelines.py +++ b/pipelines.py @@ -1953,12 +1953,12 @@ def scrape_for_db(): def argos_data(): global dean,gp - f2 = codecs.open('cache/enrollment_sp23.csv','w','utf-8') + f2 = codecs.open('cache/enrollment_argos_fa23.csv','w','utf-8') writer = csv.writer(f2) headers = 'gp dean dept num code crn name act site'.split(' ') writer.writerow(headers) - f = codecs.open('cache/sched_draft_sp23.csv','r','utf-8') + f = codecs.open('cache/sched_draft_fa23.csv','r','utf-8') reader = csv.reader(f, delimiter=',') headers = next(reader) for r in reader: @@ -1979,6 +1979,87 @@ def argos_data(): print(site) writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site]) +def days_times(s): + parts = re.search(r'^([MTWThRF]+)\s?(.*?)$',s) + if parts: + day = parts.group(1) + time = parts.group(2) + parts2 = re.search(r'^(.*)\s?-\s?(.*)$',time) + if parts2: + time_start = parts2.group(1).strip() + time_end = parts2.group(2).strip() + return day, time_start, time_end + return day, time, '' + return '','','' + +def remove_year(s): + s = re.sub(r'\-', '/', s) + if len(s)>5: return s[5:] + return s + +def argos_data_from_cvc(): + global dean,gp + short_sem = 'su23' + + f3 = codecs.open('cache/%s_sched.json' % short_sem, 'w', 'utf-8') + all_courses = [] + + f = codecs.open('cache/sched_draft_%s.csv' % short_sem, 'r','utf-8') + reader = csv.reader(f, delimiter=',') + headers = next(reader) + for r in reader: + d = dict(list(zip(headers,r))) + #print(d) + parts = re.search(r'^([A-Z]+)(\d+[A-Z]*)$', d['Course_Code']) + if parts: + dept = parts.group(1) + num = parts.group(2) + my_dean = dean[dept] + my_gp = gp[dept] + code = dept + " " + num + crn = d['CRN'] + cred = d['Units_Credit_hours'] + days, time_start, time_end = days_times(d['Meeting_Days_and_Times']) + times = "" + if time_start: times = time_start + "-" + time_end + date = remove_year(d['Start_Date']) + "-" + remove_year(d['End_Date']) + start = remove_year(d['Start_Date']) + end = remove_year(d['End_Date']) + ztc = d['ZTC'] + name = d['Course_Name'] + cap = d['Class_Capacity'] + rem = d['Available_Seats'] + act = int(cap) - int(rem) + teacher = d['Instructor_First_Name'] + " " + d['Instructor_Last_Name'] + delivery = d['Delivery'] + if delivery == "Online": + if days: + site = "Online" + type = "online live" + loc = "Online Live" + else: + site = "Online" + type = "online" + loc = "ONLINE" + elif delivery == "Hybrid": + site = d['Campus_College'] + type = "hybrid" + loc = d['Meeting_Locations'] + else: + site = d['Campus_College'] + type = "in-person" + loc = d['Meeting_Locations'] + this_course = { "crn": crn, "dept": dept, "num": num, "code": code, "name": name, "teacher": teacher, "type": type, "loc": loc, \ + "cap": cap.strip(), "act": act, "site": site, "date": date, "cred": cred.strip(), "ztc": ztc, "days": days, "time": times, \ + "start": start, "end": end, "time_start": time_start, "time_end": time_end, "dean": my_dean, "gp": my_gp} + all_courses.append(this_course) + print(site) + #writer.writerow([my_gp,my_dean,dept,num,code,crn,name,act,site]) + print(all_courses) + #print(json.dumps(all_courses)) + f3.write( json.dumps(all_courses,indent=2) ) + f3.close() + expanded = list_latestarts(short_sem) @@ -2101,7 +2182,7 @@ if __name__ == "__main__": 8: ['Scrape schedule from ssb', scrape_schedule_multi ], 9: ['Test ssb calls with python', scrape_schedule_py ], 10: ['schedule to db', scrape_for_db ], - 11: ['clean argos draft schedule file', argos_data], + 11: ['clean argos draft schedule file', argos_data_from_cvc], 12: ['make expanded schedule json files of old semesters', expand_old_semesters ], 13: ['Parse deanza schedule', dza_sched ], }