oct updates: schedules and timeblocks in local db

This commit is contained in:
Coding with Peter 2023-10-19 14:44:25 -07:00
parent 6dfbb913e0
commit 466f789355
4 changed files with 169 additions and 66 deletions

View File

@ -1884,7 +1884,18 @@ def bulk_unenroll():
print(f"Failed to unenroll student with id {enrollment_id} from course {course_id}. Error: {response.text}")
def fetch_announcements():
course_id = 18268
announcements_url = f"{url}/api/v1/announcements?context_codes[]=course_{course_id}"
announcements = fetch(announcements_url)
print(json.dumps(announcements,indent=2))
filename = f"cache/announcements{course_id}.json"
with open(filename, "w") as file:
json.dump(announcements, file)
print("Announcements saved to ", filename)
if __name__ == "__main__":
options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,
@ -1935,6 +1946,7 @@ if __name__ == "__main__":
#
45: ['Fetch rubric scores and comments', fetch_rubric_scores],
46: ['Fetch announcements in a course', fetch_announcements],
}
print ('')

View File

View File

@ -40,7 +40,7 @@ requests_format = "id timestamp year month day userid courseid rootid course_acc
users_format = "id canvasid rootactid name tz created vis school position gender locale public bd cc state sortablename globalid".split(" ")
cc_format = "id canvasid userid address type position state created updated".split(" ")
term_format = "id canvasid rootid name start end sis".split(" ")
course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid".split(" ")
course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid schedule".split(" ")
role_format = "id canvas_id root_account_id account_id name base_role_type workflow_state created_at updated_at deleted_at".split(" ")
course_score_format = "s_id c_id a_id course_id enrol_id current final muted_current muted_final".split(" ")
enrollment_dim_format = "id cid root course_section role type workflow created updated start end complete self sis course_id user_id last_activity".split(" ")
@ -92,12 +92,12 @@ DB_CUR = 0
######### LOCAL DB
#########
def db():
def db(file=sqlite_file):
global DB_CON, DB_CUR
if DB_CON:
return (DB_CON,DB_CUR)
print('grabbing db connection')
DB_CON = sqlite3.connect(sqlite_file)
DB_CON = sqlite3.connect(file)
DB_CUR = DB_CON.cursor()
return (DB_CON, DB_CUR)
@ -594,22 +594,7 @@ def is_requestfile_interesting(fname):
global thefiles, thefiles_dat
#begin_month = ['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07']
#begin_month = ['2020-09','2020-10','2020-08']
begin_month = ['2021-02','2021-03']
#AE 600 (80040; 80045; 80047) 10945
#AE 602 (80048; 80049; 80050) 10746
#AE 636 (80332; 80381) 10783
#CSIS 571A (80428) 10956
#GUID 558A (80429) 10957
# The AEC sections of interest.
sections = '10945 10746 1783 10956 10957'.split(' ')
# Just once, to get the people
#[ course_enrollment(x) for x in sections ]
begin_month = ['2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07']
first = {}
lines = False
@ -745,6 +730,8 @@ def requests_file(fname_list):
samples = codecs.open('cache/request_samples.txt', 'a', 'utf-8')
conn,cur = db()
RESUME = 610
folderi = 0
filei = 0
last_time = time.process_time()
@ -755,33 +742,30 @@ def requests_file(fname_list):
#if folderi > 2: return
print("\n%i\t%s \t" % (folderi, fname), end='', flush=True)
folderi += 1
if folderi < RESUME:
continue
filei = 0
lines = is_requestfile_interesting(fname)
if lines:
vals_cache = []
for L in lines:
thisline = requests_line(L,filei) #TODO select if timeblock exists
if not thisline:
continue
if random.random() > 0.9999:
#L = str(L)
if type(L) == type(b'abc'): L = L.decode('utf-8')
parts = L.split('\t')
if len(parts)>17:
samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]]))
#q,v = dict_to_insert(thisline,'requests')
if not 'courseid' in thisline: continue
if not 'userid' in thisline: continue
# Limit this database to certain courses?
# if thisline['courseid'] not in mycourses: continue
try:
lines = is_requestfile_interesting(fname)
if lines:
vals_cache = []
for L in lines:
thisline = requests_line(L,filei) #TODO select if timeblock exists
if not thisline:
continue
if random.random() > 0.99999:
#L = str(L)
if type(L) == type(b'abc'): L = L.decode('utf-8')
parts = L.split('\t')
if len(parts)>17:
samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]]))
if not 'courseid' in thisline: continue
if not 'userid' in thisline: continue
v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 )
vals_cache.append( [ str(x) for x in v ] )
try:
#cur.execute(q)
v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 )
vals_cache.append( [ str(x) for x in v ] )
if filei % 5000 == 0:
conn.executemany(q, vals_cache)
conn.commit()
@ -791,19 +775,16 @@ def requests_file(fname_list):
print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
samples.flush()
filei += 1
except Exception as e:
print(thisline)
print(e)
print(q)
print(v)
# do the commit on the entire file...
conn.executemany(q, vals_cache)
conn.commit()
t = time.process_time()
delta = t - last_time
last_time = t
print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
# do the commit on the entire file...
conn.executemany(q, vals_cache)
conn.commit()
t = time.process_time()
delta = t - last_time
last_time = t
print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
except Exception as e:
print(e)
print("Failed on: %s" % fname)
# Insert or update a request line.
def upsert_request(line, vals):
@ -838,17 +819,16 @@ def dict_to_insert(thisline,table): # a dict
# This and the following merge functions do direct inserts without further tallying.
# This now does tallying by timeblock.
def merge_requests():
req = []
i = 0
max = 2000
for f in os.listdir(local_data_folder):
if re.search(r'requests',f) and i < max:
req.append(f)
i += 1
#req = ['requests-00000-afc834d1.gz',]
print("Checking %i request log files." % len(req))
req = [x for x in os.listdir(local_data_folder) if 'requests' in x]
print(f"Checking {len(req)} request log files.")
requests_file(req)
#i = 0
#max = 20000
#for f in os.listdir(local_data_folder):
# if re.search(r'requests',f) and i < max:
# req.append(f)
# i += 1
def merge_comm_channel():
setup_table('comm_channel')
@ -1910,6 +1890,110 @@ def test_long_running():
sleep(1)
def courses_to_sched():
# Correlate rows in courses table with an id to rows in schedule table.
conn,cur = db('cache/canvas_data/data20231012.db')
q = "SELECT canvasid, code, sis, schedule FROM courses ORDER BY sis DESC"
conn.row_factory = dict_factory
seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
cur.execute(q)
courses = cur.fetchall()
sem = ''
for c in courses:
try:
if re.search(r'^\d\d\d\d\d\d\-\d\d\d\d\d$', c[2]):
semparts = c[2].split('-')
yr = semparts[0][2:4]
if yr in ['16','17']: continue
print(c, end=' ')
season = seasons[ str(semparts[0][4:6]) ]
sem = f"{season}{yr}"
crn = semparts[1]
print(sem, end=' ')
q2 = f"SELECT * FROM schedule WHERE crn='{crn}' AND sem='{sem}'"
cur.execute(q2)
sched = cur.fetchall()
if sched:
sched = sched[0]
id = sched[0]
q3 = f"UPDATE courses SET schedule='{id}' WHERE canvasid='{c[0]}'"
cur.execute(q3)
print(sched)
#print(q3)
else:
print()
except Exception as e:
print(e)
conn.commit()
def sched_to_db():
d = 'DROP TABLE IF EXISTS `schedule`;'
table = '''CREATE TABLE `schedule` (
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
`crn` varchar(10) NOT NULL,
`code` varchar(30) NOT NULL,
`units` varchar(20) NOT NULL,
`teacher` tinytext NOT NULL,
`start` varchar(30) NOT NULL,
`end` varchar(30) NOT NULL,
`type` varchar(20) NOT NULL,
`loc` varchar(80) NOT NULL,
`site` varchar(50) NOT NULL,
`partofday` varchar(40) NOT NULL,
`cap` INTEGER,
`act` INTEGER,
`sem` varchar(10) NOT NULL
) ;
'''
conn,cur = db('cache/canvas_data/data20231012.db')
print(table)
cur.execute(d)
cur.execute(table)
conn.commit()
vals_cache = []
last_time = time.process_time()
i = 0
output = codecs.open('cache/schedule.sql','w','utf-8')
for year in ['16','17','18','19','20','21','22','23']:
for sem in ['sp','su','fa']:
term = f"{sem}{year}"
print(term)
try:
sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
show_summary = 1
query = "INSERT INTO schedule (crn, code, units, teacher, start, end, type, loc, site, partofday, cap, act, sem) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?);"
for c in sched:
pod = ''
if 'partofday' in c: pod = c['partofday']
q = [c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, c['cap'], c['act'], term]
vals_cache.append( q ) # [ str(x) for x in q ] )
#print(f"{i}: {q}")
i += 1
if i % 500 == 0:
conn.executemany(query, vals_cache)
conn.commit()
vals_cache = []
t = time.process_time()
delta = t - last_time
last_time = t
print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True)
except Exception as e:
print(e)
conn.executemany(query, vals_cache)
conn.commit()
if __name__ == "__main__":
@ -1939,6 +2023,8 @@ if __name__ == "__main__":
22: ['all students course history', all_students_history],
23: ['test long running', test_long_running],
24: ['add conference sessions', add_sessions],
25: ['gavilan.cc extended schedule to sql insert format', sched_to_db],
26: ['correlate courses to schedule id', courses_to_sched],
#19: ['add evals for a whole semester', instructor_list_to_activate_evals],
#16: ['Upload new employees to flex app', employees_refresh_flex],
}

View File

@ -2184,6 +2184,11 @@ def list_latestarts(term="fa23"):
outfile.close()
put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)
return expanded
if __name__ == "__main__":
print ('')