oct updates: schedules and timeblocks in local db

2023-10-19 14:44:25 -07:00 · 2023-10-19 14:44:25 -07:00 · 466f789355
parent 6dfbb913e0
commit 466f789355
4 changed files with 169 additions and 66 deletions
--- a/courses.py
+++ b/courses.py
@ -1884,7 +1884,18 @@ def bulk_unenroll():
            print(f"Failed to unenroll student with id {enrollment_id} from course {course_id}. Error: {response.text}")


+def fetch_announcements():
+    course_id = 18268
+    announcements_url = f"{url}/api/v1/announcements?context_codes[]=course_{course_id}"
+    announcements = fetch(announcements_url)
+    
+    print(json.dumps(announcements,indent=2))

+    filename = f"cache/announcements{course_id}.json"
+    with open(filename, "w") as file:
+        json.dump(announcements, file)
+
+    print("Announcements saved to ", filename)

 if __name__ == "__main__":
    options = { 1: ['Cross check schedule with ztc responses',make_ztc_list] ,  
@ -1935,6 +1946,7 @@ if __name__ == "__main__":
                #

                45: ['Fetch rubric scores and comments', fetch_rubric_scores],
+                46: ['Fetch announcements in a course', fetch_announcements],
              }
    print ('')

--- a/fa19_sched.json
+++ b/fa19_sched.json
--- a/localcache.py
+++ b/localcache.py
@ -40,7 +40,7 @@ requests_format = "id timestamp year month day userid courseid rootid course_acc
 users_format = "id canvasid rootactid name tz created vis school position gender locale public bd cc state sortablename globalid".split(" ")
 cc_format = "id canvasid userid address type position state created updated".split(" ")
 term_format = "id canvasid rootid name start end sis".split(" ")
-course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid".split(" ")
+course_format = "id canvasid rootactid acctid termid name code type created start conclude visible sis state wikiid schedule".split(" ")
 role_format = "id canvas_id root_account_id account_id name base_role_type workflow_state created_at updated_at deleted_at".split(" ")
 course_score_format = "s_id c_id a_id course_id enrol_id current final muted_current muted_final".split(" ")
 enrollment_dim_format = "id cid root course_section role type workflow created updated start end complete self sis course_id user_id last_activity".split(" ")
@ -92,12 +92,12 @@ DB_CUR = 0
 ######### LOCAL DB
 #########

-def db():
+def db(file=sqlite_file):
    global DB_CON, DB_CUR
    if DB_CON:
        return (DB_CON,DB_CUR)
    print('grabbing db connection')
-    DB_CON = sqlite3.connect(sqlite_file)
+    DB_CON = sqlite3.connect(file)
    DB_CUR = DB_CON.cursor()
    
    return (DB_CON, DB_CUR)
@ -594,22 +594,7 @@ def is_requestfile_interesting(fname):
    global thefiles, thefiles_dat
    #begin_month = ['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07']
    #begin_month = ['2020-09','2020-10','2020-08']
-    begin_month = ['2021-02','2021-03']
-
-    #AE 600 (80040; 80045; 80047)  10945
-    #AE 602 (80048; 80049; 80050)  10746
-    #AE 636 (80332; 80381)         10783
-    #CSIS 571A (80428)             10956
-    #GUID 558A (80429)             10957
-
-    # The AEC sections of interest.
-    sections = '10945 10746 1783 10956 10957'.split(' ')
-    # Just once, to get the people
-    #[ course_enrollment(x) for x in sections ]
-    
-    
-    
-
+    begin_month = ['2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07']

    first = {}
    lines = False
@ -745,6 +730,8 @@ def requests_file(fname_list):
    samples = codecs.open('cache/request_samples.txt', 'a', 'utf-8')
    conn,cur = db()

+    RESUME = 610
+
    folderi = 0
    filei = 0
    last_time = time.process_time()
@ -755,33 +742,30 @@ def requests_file(fname_list):
        #if folderi > 2: return
        print("\n%i\t%s   \t" % (folderi, fname), end='', flush=True)
        folderi += 1
+        if folderi < RESUME: 
+            continue
        filei = 0

-        lines = is_requestfile_interesting(fname)
-        if lines:
-            vals_cache = []
-            for L in lines:
-                thisline = requests_line(L,filei)    #TODO select if timeblock exists
-                if not thisline:
-                    continue
-                if random.random() > 0.9999:
-                    #L = str(L)
-                    if type(L) == type(b'abc'): L = L.decode('utf-8')
-                    parts = L.split('\t')
-                    if len(parts)>17:
-                        samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]]))
-                        
-                #q,v = dict_to_insert(thisline,'requests')
-                if not 'courseid' in thisline: continue
-                if not 'userid' in thisline: continue
-                
-                # Limit this database to certain courses?
-                # if thisline['courseid'] not in mycourses: continue
+        try:
+            lines = is_requestfile_interesting(fname)
+            if lines:
+                vals_cache = []
+                for L in lines:
+                    thisline = requests_line(L,filei)    #TODO select if timeblock exists
+                    if not thisline:
+                        continue
+                    if random.random() > 0.99999:
+                        #L = str(L)
+                        if type(L) == type(b'abc'): L = L.decode('utf-8')
+                        parts = L.split('\t')
+                        if len(parts)>17:
+                            samples.write( "\t".join( [parts[13] , parts[14], parts[15], parts[16], parts[18], parts[19]]))
+                            
+                    if not 'courseid' in thisline: continue
+                    if not 'userid' in thisline: continue

-                v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 )
-                vals_cache.append(  [ str(x) for x in v ] )
-                try:
-                    #cur.execute(q)
+                    v = ( thisline['userid'], thisline['courseid'], thisline['time_block'], 1 )
+                    vals_cache.append(  [ str(x) for x in v ] )
                    if filei % 5000 == 0:
                        conn.executemany(q, vals_cache)
                        conn.commit()
@ -791,19 +775,16 @@ def requests_file(fname_list):
                        print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
                        samples.flush()
                    filei += 1
-                except Exception as e:
-                    print(thisline)
-                    print(e)
-                    print(q)
-                    print(v)
-            # do the commit on the entire file... 
-            conn.executemany(q, vals_cache)
-            conn.commit()
-            t = time.process_time()
-            delta = t - last_time
-            last_time = t
-            print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
-
+                # do the commit on the entire file... 
+                conn.executemany(q, vals_cache)
+                conn.commit()
+                t = time.process_time()
+                delta = t - last_time
+                last_time = t
+                print("\nLoop %i - committed to db in %0.1fs. " % (filei,delta), end='', flush=True)
+        except Exception as e:
+            print(e)
+            print("Failed on: %s" % fname)

 # Insert or update a request line.
 def upsert_request(line, vals):
@ -838,17 +819,16 @@ def dict_to_insert(thisline,table):  # a dict
 # This and the following merge functions do direct inserts without further tallying.
 # This now does tallying by timeblock. 
 def merge_requests():
-    req = []
-    i = 0
-    max = 2000
-
-    for f in os.listdir(local_data_folder):
-        if re.search(r'requests',f) and i < max: 
-            req.append(f)
-            i += 1
-    #req = ['requests-00000-afc834d1.gz',]
-    print("Checking %i request log files." % len(req))
+    req =  [x for x in os.listdir(local_data_folder) if 'requests' in x]
+    print(f"Checking {len(req)} request log files.")
    requests_file(req)
+    #i = 0
+    #max = 20000
+
+    #for f in os.listdir(local_data_folder):
+    #    if re.search(r'requests',f) and i < max: 
+    #        req.append(f)
+    #        i += 1

 def merge_comm_channel():
    setup_table('comm_channel')
@ -1910,6 +1890,110 @@ def test_long_running():
        sleep(1)


+def courses_to_sched():
+    # Correlate rows in courses table with an id to rows in schedule table. 
+    conn,cur = db('cache/canvas_data/data20231012.db')
+    q = "SELECT canvasid, code, sis, schedule FROM courses ORDER BY sis DESC"
+    conn.row_factory = dict_factory
+
+    seasons = {'10':'wi','30':'sp','50':'su','70':'fa'}
+
+    cur.execute(q)
+    courses = cur.fetchall()
+    sem = ''
+    for c in courses:
+        try:
+            if re.search(r'^\d\d\d\d\d\d\-\d\d\d\d\d$', c[2]):
+                semparts = c[2].split('-')
+                yr = semparts[0][2:4]
+                if yr in ['16','17']: continue
+                print(c, end=' ')
+                season = seasons[ str(semparts[0][4:6]) ]
+                sem = f"{season}{yr}"
+                crn = semparts[1]
+                print(sem, end=' ')
+                q2 = f"SELECT * FROM schedule WHERE crn='{crn}' AND sem='{sem}'"
+                cur.execute(q2)
+                sched = cur.fetchall()
+                if sched: 
+                    sched = sched[0]
+                    id = sched[0]
+                    q3 = f"UPDATE courses SET schedule='{id}' WHERE canvasid='{c[0]}'"
+                    cur.execute(q3)
+                    
+                    print(sched)
+                    #print(q3)
+                else:
+                    print()
+        except Exception as e:
+            print(e)
+    conn.commit()
+
+
+
+def sched_to_db():
+    d = 'DROP TABLE IF EXISTS `schedule`;'
+    table = '''CREATE TABLE `schedule` (
+  `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+  `crn` varchar(10)  NOT NULL,
+  `code` varchar(30)  NOT NULL,
+  `units` varchar(20)  NOT NULL,
+  `teacher` tinytext  NOT NULL,
+  `start` varchar(30)  NOT NULL,
+  `end` varchar(30)  NOT NULL,
+  `type` varchar(20)  NOT NULL,
+  `loc` varchar(80)  NOT NULL,
+  `site` varchar(50)  NOT NULL,
+  `partofday` varchar(40)  NOT NULL,
+  `cap` INTEGER,
+  `act` INTEGER,
+  `sem` varchar(10)  NOT NULL
+) ;
+'''
+
+    conn,cur = db('cache/canvas_data/data20231012.db')
+    print(table)
+    cur.execute(d)
+    cur.execute(table)
+    conn.commit()
+
+    vals_cache = []
+    last_time = time.process_time()
+    i = 0
+
+    output = codecs.open('cache/schedule.sql','w','utf-8')
+    for year in ['16','17','18','19','20','21','22','23']:
+        for sem in ['sp','su','fa']:
+            term = f"{sem}{year}"
+            print(term)
+            try:
+                sched = requests.get(f"http://gavilan.cc/schedule/{term}_sched_expanded.json").json()
+                show_summary = 1
+                
+                query = "INSERT INTO schedule (crn, code, units, teacher, start, end, type, loc, site, partofday, cap, act, sem) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?);"
+                
+                for c in sched:
+                    pod = ''
+                    if 'partofday' in c: pod = c['partofday']
+                    q = [c['crn'], c['code'], c['cred'], c['teacher'], c['start'], c['end'], c['type'], c['loc'], c['site'], pod, c['cap'], c['act'], term]
+                    vals_cache.append( q )  # [ str(x) for x in q ] )
+                    #print(f"{i}: {q}")
+                    i += 1
+                    if i % 500 == 0:
+                        conn.executemany(query, vals_cache)
+                        conn.commit()
+                        vals_cache = []
+                        t = time.process_time()
+                        delta = t - last_time
+                        last_time = t
+                        print(f"Loop {i} - committed to db in %0.3fs. " % delta, flush=True)
+
+            except Exception as e:
+                print(e)
+    conn.executemany(query, vals_cache)
+    conn.commit()
+
+

 if __name__ == "__main__":

@ -1939,6 +2023,8 @@ if __name__ == "__main__":
            22: ['all students course history', all_students_history],
            23: ['test long running', test_long_running],
            24: ['add conference sessions', add_sessions],
+            25: ['gavilan.cc extended schedule to sql insert format', sched_to_db],
+            26: ['correlate courses to schedule id', courses_to_sched],
            #19: ['add evals for a whole semester', instructor_list_to_activate_evals],
            #16: ['Upload new employees to flex app', employees_refresh_flex],
            }
--- a/pipelines.py
+++ b/pipelines.py
@ -2184,6 +2184,11 @@ def list_latestarts(term="fa23"):
    outfile.close()
    put_file('/home/public/schedule/', 'cache/', "%s_latestarts.txt" % term, 0)  
    return expanded
+
+
+
+
+
 if __name__ == "__main__":
    
    print ('')