cleaning - courses, pipelines

2023-03-27 17:17:32 -07:00 · 2023-03-27 17:17:32 -07:00 · 55290f9fa1
parent 89165b6a09
commit 55290f9fa1
5 changed files with 74 additions and 71 deletions
--- a/courses.py
+++ b/courses.py
@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os
 import pandas as pd
 from dateutil import parser
 from datetime import datetime
-from util import print_table
+from util import print_table, int_or_zero, float_or_zero
 from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
 from pipelines import sems
@ -19,14 +19,6 @@ stem_course_id = '11015'   # TODO
 #########
 #########
 def int_or_zero(x):
    if x == None: return 0
    else: return int(x)
 def float_or_zero(x):
    if x == None: return 0
    else: return float(x)
 # Gott 1 Bootcamp - report on who completed it.
 def get_gott1_passers():
    course = '1561'
@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"):
    print(r.text)
 #########
 #########  BOOKSTORE
 #########
 #########
 def scrape_bookstore():
    big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
    bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
    one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   # NO TEXT
    another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   #  3 REQUIRED at: 
    # [""0""].courseSectionDTO[""0""].courseMaterialResultsList
    #
    # and also:
    #
    # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
 # todo: where does the most recent schedule come from?
 # Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
 def list_latestarts():
    #term = input("Name of current semester file? (ex: sp18)  ")
    term = "sp23"   # sems[0]
    term_in = "cache/" + term + "_sched.json"                 
    term_out = "cache/" + term + "_latestarts.txt"
    print("Writing output to " + term_out)
    infile = open(term_in, "r")
    outfile = open(term_out, "w")
    sched = json.loads(infile.read())
    #print sched
    by_date = {}
    for C in sched:
        parts = C['date'].split("-")
        start = parts[0]
        codes = C['code'].split(' ')
        dept = codes[0]
        if dept in ['JLE','JFT','CWE']:
            continue
        if re.search('TBA',start): continue
        try:
            startd = parser.parse(start)
        except Exception as e:
            print(e, "\nproblem parsing ", start)
        #print startd
        if not startd in by_date:
            by_date[startd] = []
        by_date[startd].append(C)
    for X in sorted(by_date.keys()):
        #print "Start: " + str(X)
        if len(by_date[X]) < 200:
            prettydate = X.strftime("%A, %B %d")
            print(prettydate + ": " + str(len(by_date[X])) + " courses")
            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
            for Y in by_date[X]:
                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
                print(Y)
                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
 # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
 def users_in_semester():
--- a/depricated.py
+++ b/depricated.py
@ -6,6 +6,53 @@
 # todo: where does the most recent schedule come from?
 # Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
 def list_latestarts():
    #term = input("Name of current semester file? (ex: sp18)  ")
    term = "sp23"   # sems[0]
    term_in = "cache/" + term + "_sched.json"                 
    term_out = "cache/" + term + "_latestarts.txt"
    print("Writing output to " + term_out)
    infile = open(term_in, "r")
    outfile = open(term_out, "w")
    sched = json.loads(infile.read())
    #print sched
    by_date = {}
    for C in sched:
        parts = C['date'].split("-")
        start = parts[0]
        codes = C['code'].split(' ')
        dept = codes[0]
        if dept in ['JLE','JFT','CWE']:
            continue
        if re.search('TBA',start): continue
        try:
            startd = parser.parse(start)
        except Exception as e:
            print(e, "\nproblem parsing ", start)
        #print startd
        if not startd in by_date:
            by_date[startd] = []
        by_date[startd].append(C)
    for X in sorted(by_date.keys()):
        #print "Start: " + str(X)
        if len(by_date[X]) < 200:
            prettydate = X.strftime("%A, %B %d")
            print(prettydate + ": " + str(len(by_date[X])) + " courses")
            outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
            for Y in by_date[X]:
                #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
                print(Y)
                #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
                outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
 online_courses = {}
 def prep_online_courses_df():
    global online_courses
--- a/pipelines.py
+++ b/pipelines.py
@ -2074,7 +2074,7 @@ def expand_old_semesters():
        input('press return to continue.')
 # Input: xxxx_sched.json. Output: xxxx_latestarts.txt    
-def list_latestarts(term):
+def list_latestarts(term="sp23"):
    show_summary = 1
--- a/tasks.py
+++ b/tasks.py
@ -30,6 +30,25 @@ from pipelines import header, url, fetch
 #from localcache import local_data_folder, sqlite_file, db, user_goo_to_email
 #########
 #########  BOOKSTORE
 #########
 #########
 def scrape_bookstore():
    big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
    bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
    one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   # NO TEXT
    another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse"   #  3 REQUIRED at: 
    # [""0""].courseSectionDTO[""0""].courseMaterialResultsList
    #
    # and also:
    #
    # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
 def survey_answer(q=0):
--- a/util.py
+++ b/util.py
@ -42,7 +42,13 @@ def clean_title(st):
    if len(st)>50: return st[:50]+'...'
    return st
 def int_or_zero(x):
    if x == None: return 0
    else: return int(x)
 def float_or_zero(x):
    if x == None: return 0
    else: return float(x)
 def match59(x):
    if x['links']['context']==7959: return True