cleaning - courses, pipelines

2023-03-27 17:17:32 -07:00 · 2023-03-27 17:17:32 -07:00 · 55290f9fa1
parent 89165b6a09
commit 55290f9fa1
5 changed files with 74 additions and 71 deletions
--- a/courses.py
+++ b/courses.py
@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from dateutil import parser
 from datetime import datetime
-from util import print_table
+from util import print_table, int_or_zero, float_or_zero

 from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
 from pipelines import sems
@ -19,14 +19,6 @@ stem_course_id = '11015'   # TODO
 #########
 #########

-def int_or_zero(x):
-    if x == None: return 0
-    else: return int(x)
-
-def float_or_zero(x):
-    if x == None: return 0
-    else: return float(x)
-
 # Gott 1 Bootcamp - report on who completed it.
 def get_gott1_passers():
    course = '1561'
@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"):
    print(r.text)


-#########
-#########  BOOKSTORE
-#########
-#########
-
-def scrape_bookstore():
-    big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
-    bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
-    
-    one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   # NO TEXT
-    
-    another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   #  3 REQUIRED at: 
-    # [""0""].courseSectionDTO[""0""].courseMaterialResultsList
-    #
-    # and also:
-    #
-    # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
-    
-# todo: where does the most recent schedule come from?
-
-# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
-def list_latestarts():
-    #term = input("Name of current semester file? (ex: sp18)  ")
-    term = "sp23"   # sems[0]
-    
-    term_in = "cache/" + term + "_sched.json"                 
-    term_out = "cache/" + term + "_latestarts.txt"
-    print("Writing output to " + term_out)
-    infile = open(term_in, "r")
-    outfile = open(term_out, "w")
-    sched = json.loads(infile.read())
-    #print sched
-    by_date = {}
-    for C in sched:
-        parts = C['date'].split("-")
-        start = parts[0]
-        codes = C['code'].split(' ')
-        dept = codes[0]
-        if dept in ['JLE','JFT','CWE']:
-            continue
-        if re.search('TBA',start): continue
-        try:
-            startd = parser.parse(start)
-        except Exception as e:
-            print(e, "\nproblem parsing ", start)
-        #print startd
-        if not startd in by_date:
-            by_date[startd] = []
-        by_date[startd].append(C)
-    for X in sorted(by_date.keys()):
-        #print "Start: " + str(X)
-        if len(by_date[X]) < 200:
-            prettydate = X.strftime("%A, %B %d")
-            print(prettydate + ": " + str(len(by_date[X])) + " courses")
-            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
-            for Y in by_date[X]:
-                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
-                print(Y)
-                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
-                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
-

 # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
 def users_in_semester():
--- a/depricated.py
+++ b/depricated.py
@ -6,6 +6,53 @@



+
+# todo: where does the most recent schedule come from?
+
+# Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
+def list_latestarts():
+    #term = input("Name of current semester file? (ex: sp18)  ")
+    term = "sp23"   # sems[0]
+    
+    term_in = "cache/" + term + "_sched.json"                 
+    term_out = "cache/" + term + "_latestarts.txt"
+    print("Writing output to " + term_out)
+    infile = open(term_in, "r")
+    outfile = open(term_out, "w")
+    sched = json.loads(infile.read())
+    #print sched
+    by_date = {}
+    for C in sched:
+        parts = C['date'].split("-")
+        start = parts[0]
+        codes = C['code'].split(' ')
+        dept = codes[0]
+        if dept in ['JLE','JFT','CWE']:
+            continue
+        if re.search('TBA',start): continue
+        try:
+            startd = parser.parse(start)
+        except Exception as e:
+            print(e, "\nproblem parsing ", start)
+        #print startd
+        if not startd in by_date:
+            by_date[startd] = []
+        by_date[startd].append(C)
+    for X in sorted(by_date.keys()):
+        #print "Start: " + str(X)
+        if len(by_date[X]) < 200:
+            prettydate = X.strftime("%A, %B %d")
+            print(prettydate + ": " + str(len(by_date[X])) + " courses")
+            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
+            for Y in by_date[X]:
+                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
+                print(Y)
+                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
+                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
+
+
+
+
 online_courses = {}
 def prep_online_courses_df():
    global online_courses
--- a/pipelines.py
+++ b/pipelines.py
@ -2074,7 +2074,7 @@ def expand_old_semesters():
        input('press return to continue.')

 # Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
-def list_latestarts(term):
+def list_latestarts(term="sp23"):
    
    show_summary = 1
    
--- a/tasks.py
+++ b/tasks.py
@ -30,6 +30,25 @@ from pipelines import header, url, fetch

 #from localcache import local_data_folder, sqlite_file, db, user_goo_to_email

+#########
+#########  BOOKSTORE
+#########
+#########
+
+def scrape_bookstore():
+    big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
+    bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
+    
+    one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   # NO TEXT
+    
+    another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   #  3 REQUIRED at: 
+    # [""0""].courseSectionDTO[""0""].courseMaterialResultsList
+    #
+    # and also:
+    #
+    # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
+    
+

 def survey_answer(q=0):

--- a/util.py
+++ b/util.py
@ -42,7 +42,13 @@ def clean_title(st):
    if len(st)>50: return st[:50]+'...'
    return st

+def int_or_zero(x):
+    if x == None: return 0
+    else: return int(x)

+def float_or_zero(x):
+    if x == None: return 0
+    else: return float(x)

 def match59(x):
    if x['links']['context']==7959: return True