diff --git a/courses.py b/courses.py index 12a1f01..ae24856 100644 --- a/courses.py +++ b/courses.py @@ -1884,7 +1884,18 @@ def bulk_unenroll(): print(f"Failed to unenroll student with id {enrollment_id} from course {course_id}. Error: {response.text}") +def fetch_announcements(): + course_id = 18268 + announcements_url = f"{url}/api/v1/announcements?context_codes[]=course_{course_id}" + announcements = fetch(announcements_url) + + print(json.dumps(announcements,indent=2)) + filename = f"cache/announcements{course_id}.json" + with open(filename, "w") as file: + json.dump(announcements, file) + + print("Announcements saved to ", filename) if __name__ == "__main__": options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] , @@ -1935,6 +1946,7 @@ if __name__ == "__main__": # 45: ['Fetch rubric scores and comments', fetch_rubric_scores], + 46: ['Fetch announcements in a course', fetch_announcements], } print ('') diff --git a/fa19_sched.json b/fa19_sched.json deleted file mode 100644 index e69de29..0000000 diff --git a/localcache.py b/localcache.py index 37c5b66..b2c26f6 100644 --- a/localcache.py +++ b/localcache.py @@ -40,7 +40,7 @@ requests_format = "id timestamp year month day userid courseid rootid course_acc users_format = "id canvasid rootactid name tz created vis school position gender locale public bd cc state sortablename globalid".split(" ") cc_format = "id canvasid userid address type position state created updated".split(" ") term_format = "id canvasid rootid name start end sis".split(" ") -course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid".split(" ") +course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid schedule".split(" ") role_format = "id canvas_id root_account_id account_id name base_role_type workflow_state created_at updated_at deleted_at".split(" ") course_score_format = "s_id c_id a_id course_id enrol_id current final muted_current muted_final".split(" ") enrollment_dim_format = "id cid root course_section role type workflow created updated start end complete self sis course_id user_id last_activity".split(" ") @@ -92,12 +92,12 @@ DB_CUR = 0 ######### LOCAL DB ######### -def db(): +def db(file=sqlite_file): global DB_CON, DB_CUR if DB_CON: return (DB_CON,DB_CUR) print('grabbing db connection') - DB_CON = sqlite3.connect(sqlite_file) + DB_CON = sqlite3.connect(file) DB_CUR = DB_CON.cursor() return (DB_CON, DB_CUR) @@ -594,22 +594,7 @@ def is_requestfile_interesting(fname): global thefiles, thefiles_dat #begin_month = ['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07'] #begin_month = ['2020-09','2020-10','2020-08'] - begin_month = ['2021-02','2021-03'] - - #AE 600 (80040; 80045; 80047) 10945 - #AE 602 (80048; 80049; 80050) 10746 - #AE 636 (80332; 80381) 10783 - #CSIS 571A (80428) 10956 - #GUID 558A (80429) 10957 - - # The AEC sections of interest. - sections = '10945 10746 1783 10956 10957'.split(' ') - # Just once, to get the people - #[ course_enrollment(x) for x in sections ] - - - - + begin_month = ['2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07'] first = {} lines = False @@ -745,6 +730,8 @@ def requests_file(fname_list): samples = codecs.open('cache/request_samples.txt', 'a', 'utf-8') conn,cur = db() + RESUME = 610 + folderi = 0 filei = 0 last_time = time.process_time() @@ -755,33 +742,30 @@ def requests_file(fname_list): #if folderi > 2: return print("\n%i\t%s \t" % (folderi, fname), end='', flush=True) folderi += 1 + if folderi < RESUME: + continue filei = 0 - lines = is_requestfile_interesting(fname) - if lines: - vals_cache = [] - for L in lines: - thisline = requests_line(L,filei) #TODO select if timeblock exists - if not thisline: - continue - if random.random() > 0.9999: - #L = str(L) - if type(L) == type(b'abc'): L = L.decode('utf-8') - parts = L.split('\t') - if len(parts)>17: - samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]])) - - #q,v = dict_to_insert(thisline,'requests') - if not 'courseid' in thisline: continue - if not 'userid' in thisline: continue - - # Limit this database to certain courses? - # if thisline['courseid'] not in mycourses: continue + try: + lines = is_requestfile_interesting(fname) + if lines: + vals_cache = [] + for L in lines: + thisline = requests_line(L,filei) #TODO select if timeblock exists + if not thisline: + continue + if random.random() > 0.99999: + #L = str(L) + if type(L) == type(b'abc'): L = L.decode('utf-8') + parts = L.split('\t') + if len(parts)>17: + samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]])) + + if not 'courseid' in thisline: continue + if not 'userid' in thisline: continue - v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 ) - vals_cache.append( [ str(x) for x in v ] ) - try: - #cur.execute(q) + v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 ) + vals_cache.append( [ str(x) for x in v ] ) if filei % 5000 == 0: conn.executemany(q, vals_cache) conn.commit() @@ -791,19 +775,16 @@ def requests_file(fname_list): print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True) samples.flush() filei += 1 - except Exception as e: - print(thisline) - print(e) - print(q) - print(v) - # do the commit on the entire file... - conn.executemany(q, vals_cache) - conn.commit() - t = time.process_time() - delta = t - last_time - last_time = t - print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True) - + # do the commit on the entire file... + conn.executemany(q, vals_cache) + conn.commit() + t = time.process_time() + delta = t - last_time + last_time = t + print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True) + except Exception as e: + print(e) + print("Failed on: %s" % fname) # Insert or update a request line. def upsert_request(line, vals): @@ -838,17 +819,16 @@ def dict_to_insert(thisline,table): # a dict # This and the following merge functions do direct inserts without further tallying. # This now does tallying by timeblock. def merge_requests(): - req = [] - i = 0 - max = 2000 - - for f in os.listdir(local_data_folder): - if re.search(r'requests',f) and i < max: - req.append(f) - i += 1 - #req = ['requests-00000-afc834d1.gz',] - print("Checking %i request log files." % len(req)) + req = [x for x in os.listdir(local_data_folder) if 'requests' in x] + print(f"Checking {len(req)} request log files.") requests_file(req) + #i = 0 + #max = 20000 + + #for f in os.listdir(local_data_folder): + # if re.search(r'requests',f) and i < max: + # req.append(f) + # i += 1 def merge_comm_channel(): setup_table('comm_channel') @@ -1910,6 +1890,110 @@ def test_long_running(): sleep(1) +def courses_to_sched(): + # Correlate rows in courses table with an id to rows in schedule table. + conn,cur = db('cache/canvas_data/data20231012.db') + q = "SELECT canvasid, code, sis, schedule FROM courses ORDER BY sis DESC" + conn.row_factory = dict_factory + + seasons = {'10':'wi','30':'sp','50':'su','70':'fa'} + + cur.execute(q) + courses = cur.fetchall() + sem = '' + for c in courses: + try: + if re.search(r'^\d\d\d\d\d\d\-\d\d\d\d\d$', c[2]): + semparts = c[2].split('-') + yr = semparts[0][2:4] + if yr in ['16','17']: continue + print(c, end=' ') + season = seasons[ str(semparts[0][4:6]) ] + sem = f"{season}{yr}" + crn = semparts[1] + print(sem, end=' ') + q2 = f"SELECT * FROM schedule WHERE crn='{crn}' AND sem='{sem}'" + cur.execute(q2) + sched = cur.fetchall() + if sched: + sched = sched[0] + id = sched[0] + q3 = f"UPDATE courses SET schedule='{id}' WHERE canvasid='{c[0]}'" + cur.execute(q3) + + print(sched) + #print(q3) + else: + print() + except Exception as e: + print(e) + conn.commit() + + + +def sched_to_db(): + d = 'DROP TABLE IF EXISTS `schedule`;' + table = '''CREATE TABLE `schedule` ( + `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + `crn` varchar(10) NOT NULL, + `code` varchar(30) NOT NULL, + `units` varchar(20) NOT NULL, + `teacher` tinytext NOT NULL, + `start` varchar(30) NOT NULL, + `end` varchar(30) NOT NULL, + `type` varchar(20) NOT NULL, + `loc` varchar(80) NOT NULL, + `site` varchar(50) NOT NULL, + `partofday` varchar(40) NOT NULL, + `cap` INTEGER, + `act` INTEGER, + `sem` varchar(10) NOT NULL +) ; +''' + + conn,cur = db('cache/canvas_data/data20231012.db') + print(table) + cur.execute(d) + cur.execute(table) + conn.commit() + + vals_cache = [] + last_time = time.process_time() + i = 0 + + output = codecs.open('cache/schedule.sql','w','utf-8') + for year in ['16','17','18','19','20','21','22','23']: + for sem in ['sp','su','fa']: + term = f"{sem}{year}" + print(term) + try: + sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json() + show_summary = 1 + + query = "INSERT INTO schedule (crn, code, units, teacher, start, end, type, loc, site, partofday, cap, act, sem) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?);" + + for c in sched: + pod = '' + if 'partofday' in c: pod = c['partofday'] + q = [c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, c['cap'], c['act'], term] + vals_cache.append( q ) # [ str(x) for x in q ] ) + #print(f"{i}: {q}") + i += 1 + if i % 500 == 0: + conn.executemany(query, vals_cache) + conn.commit() + vals_cache = [] + t = time.process_time() + delta = t - last_time + last_time = t + print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True) + + except Exception as e: + print(e) + conn.executemany(query, vals_cache) + conn.commit() + + if __name__ == "__main__": @@ -1939,6 +2023,8 @@ if __name__ == "__main__": 22: ['all students course history', all_students_history], 23: ['test long running', test_long_running], 24: ['add conference sessions', add_sessions], + 25: ['gavilan.cc extended schedule to sql insert format', sched_to_db], + 26: ['correlate courses to schedule id', courses_to_sched], #19: ['add evals for a whole semester', instructor_list_to_activate_evals], #16: ['Upload new employees to flex app', employees_refresh_flex], } diff --git a/pipelines.py b/pipelines.py index e7dce93..7d391d7 100644 --- a/pipelines.py +++ b/pipelines.py @@ -2184,6 +2184,11 @@ def list_latestarts(term="fa23"): outfile.close() put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0) return expanded + + + + + if __name__ == "__main__": print ('')