From 55290f9fa16267c38f7b7f194c41551689a435ca Mon Sep 17 00:00:00 2001 From: phowell Date: Mon, 27 Mar 2023 17:17:32 -0700 Subject: [PATCH] cleaning - courses, pipelines --- courses.py | 71 +-------------------------------------------------- depricated.py | 47 ++++++++++++++++++++++++++++++++++ pipelines.py | 2 +- tasks.py | 19 ++++++++++++++ util.py | 6 +++++ 5 files changed, 74 insertions(+), 71 deletions(-) diff --git a/courses.py b/courses.py index 71e6742..cf8be67 100644 --- a/courses.py +++ b/courses.py @@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os import pandas as pd from dateutil import parser from datetime import datetime -from util import print_table +from util import print_table, int_or_zero, float_or_zero from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem from pipelines import sems @@ -19,14 +19,6 @@ stem_course_id = '11015' # TODO ######### ######### -def int_or_zero(x): - if x == None: return 0 - else: return int(x) - -def float_or_zero(x): - if x == None: return 0 - else: return float(x) - # Gott 1 Bootcamp - report on who completed it. def get_gott1_passers(): course = '1561' @@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"): print(r.text) -######### -######### BOOKSTORE -######### -######### - -def scrape_bookstore(): - big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761" - bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() ) - - one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT - - another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at: - # [""0""].courseSectionDTO[""0""].courseMaterialResultsList - # - # and also: - # - # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions - -# todo: where does the most recent schedule come from? - -# Input: xxxx_sched.json. Output: xxxx_latestarts.txt -def list_latestarts(): - #term = input("Name of current semester file? (ex: sp18) ") - term = "sp23" # sems[0] - - term_in = "cache/" + term + "_sched.json" - term_out = "cache/" + term + "_latestarts.txt" - print("Writing output to " + term_out) - infile = open(term_in, "r") - outfile = open(term_out, "w") - sched = json.loads(infile.read()) - #print sched - by_date = {} - for C in sched: - parts = C['date'].split("-") - start = parts[0] - codes = C['code'].split(' ') - dept = codes[0] - if dept in ['JLE','JFT','CWE']: - continue - if re.search('TBA',start): continue - try: - startd = parser.parse(start) - except Exception as e: - print(e, "\nproblem parsing ", start) - #print startd - if not startd in by_date: - by_date[startd] = [] - by_date[startd].append(C) - for X in sorted(by_date.keys()): - #print "Start: " + str(X) - if len(by_date[X]) < 200: - prettydate = X.strftime("%A, %B %d") - print(prettydate + ": " + str(len(by_date[X])) + " courses") - outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n") - for Y in by_date[X]: - #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] - print(Y) - #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n") - outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n") - # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids. def users_in_semester(): diff --git a/depricated.py b/depricated.py index 009460b..73e843d 100644 --- a/depricated.py +++ b/depricated.py @@ -6,6 +6,53 @@ + +# todo: where does the most recent schedule come from? + +# Input: xxxx_sched.json. Output: xxxx_latestarts.txt +def list_latestarts(): + #term = input("Name of current semester file? (ex: sp18) ") + term = "sp23" # sems[0] + + term_in = "cache/" + term + "_sched.json" + term_out = "cache/" + term + "_latestarts.txt" + print("Writing output to " + term_out) + infile = open(term_in, "r") + outfile = open(term_out, "w") + sched = json.loads(infile.read()) + #print sched + by_date = {} + for C in sched: + parts = C['date'].split("-") + start = parts[0] + codes = C['code'].split(' ') + dept = codes[0] + if dept in ['JLE','JFT','CWE']: + continue + if re.search('TBA',start): continue + try: + startd = parser.parse(start) + except Exception as e: + print(e, "\nproblem parsing ", start) + #print startd + if not startd in by_date: + by_date[startd] = [] + by_date[startd].append(C) + for X in sorted(by_date.keys()): + #print "Start: " + str(X) + if len(by_date[X]) < 200: + prettydate = X.strftime("%A, %B %d") + print(prettydate + ": " + str(len(by_date[X])) + " courses") + outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n") + for Y in by_date[X]: + #print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + print(Y) + #outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n") + outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n") + + + + online_courses = {} def prep_online_courses_df(): global online_courses diff --git a/pipelines.py b/pipelines.py index cfe930f..7010a15 100644 --- a/pipelines.py +++ b/pipelines.py @@ -2074,7 +2074,7 @@ def expand_old_semesters(): input('press return to continue.') # Input: xxxx_sched.json. Output: xxxx_latestarts.txt -def list_latestarts(term): +def list_latestarts(term="sp23"): show_summary = 1 diff --git a/tasks.py b/tasks.py index 5f8e28b..6915a4d 100644 --- a/tasks.py +++ b/tasks.py @@ -30,6 +30,25 @@ from pipelines import header, url, fetch #from localcache import local_data_folder, sqlite_file, db, user_goo_to_email +######### +######### BOOKSTORE +######### +######### + +def scrape_bookstore(): + big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761" + bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() ) + + one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT + + another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at: + # [""0""].courseSectionDTO[""0""].courseMaterialResultsList + # + # and also: + # + # [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions + + def survey_answer(q=0): diff --git a/util.py b/util.py index 122e929..1a87041 100644 --- a/util.py +++ b/util.py @@ -42,7 +42,13 @@ def clean_title(st): if len(st)>50: return st[:50]+'...' return st +def int_or_zero(x): + if x == None: return 0 + else: return int(x) +def float_or_zero(x): + if x == None: return 0 + else: return float(x) def match59(x): if x['links']['context']==7959: return True