cleaning - courses, pipelines

This commit is contained in:
phowell 2023-03-27 17:17:32 -07:00
parent 89165b6a09
commit 55290f9fa1
5 changed files with 74 additions and 71 deletions

View File

@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os
import pandas as pd import pandas as pd
from dateutil import parser from dateutil import parser
from datetime import datetime from datetime import datetime
from util import print_table from util import print_table, int_or_zero, float_or_zero
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
from pipelines import sems from pipelines import sems
@ -19,14 +19,6 @@ stem_course_id = '11015' # TODO
######### #########
######### #########
def int_or_zero(x):
if x == None: return 0
else: return int(x)
def float_or_zero(x):
if x == None: return 0
else: return float(x)
# Gott 1 Bootcamp - report on who completed it. # Gott 1 Bootcamp - report on who completed it.
def get_gott1_passers(): def get_gott1_passers():
course = '1561' course = '1561'
@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"):
print(r.text) print(r.text)
#########
######### BOOKSTORE
#########
#########
def scrape_bookstore():
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
#
# and also:
#
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
# todo: where does the most recent schedule come from?
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
def list_latestarts():
#term = input("Name of current semester file? (ex: sp18) ")
term = "sp23" # sems[0]
term_in = "cache/" + term + "_sched.json"
term_out = "cache/" + term + "_latestarts.txt"
print("Writing output to " + term_out)
infile = open(term_in, "r")
outfile = open(term_out, "w")
sched = json.loads(infile.read())
#print sched
by_date = {}
for C in sched:
parts = C['date'].split("-")
start = parts[0]
codes = C['code'].split(' ')
dept = codes[0]
if dept in ['JLE','JFT','CWE']:
continue
if re.search('TBA',start): continue
try:
startd = parser.parse(start)
except Exception as e:
print(e, "\nproblem parsing ", start)
#print startd
if not startd in by_date:
by_date[startd] = []
by_date[startd].append(C)
for X in sorted(by_date.keys()):
#print "Start: " + str(X)
if len(by_date[X]) < 200:
prettydate = X.strftime("%A, %B %d")
print(prettydate + ": " + str(len(by_date[X])) + " courses")
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
for Y in by_date[X]:
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
print(Y)
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
# All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids. # All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
def users_in_semester(): def users_in_semester():

View File

@ -6,6 +6,53 @@
# todo: where does the most recent schedule come from?
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
def list_latestarts():
#term = input("Name of current semester file? (ex: sp18) ")
term = "sp23" # sems[0]
term_in = "cache/" + term + "_sched.json"
term_out = "cache/" + term + "_latestarts.txt"
print("Writing output to " + term_out)
infile = open(term_in, "r")
outfile = open(term_out, "w")
sched = json.loads(infile.read())
#print sched
by_date = {}
for C in sched:
parts = C['date'].split("-")
start = parts[0]
codes = C['code'].split(' ')
dept = codes[0]
if dept in ['JLE','JFT','CWE']:
continue
if re.search('TBA',start): continue
try:
startd = parser.parse(start)
except Exception as e:
print(e, "\nproblem parsing ", start)
#print startd
if not startd in by_date:
by_date[startd] = []
by_date[startd].append(C)
for X in sorted(by_date.keys()):
#print "Start: " + str(X)
if len(by_date[X]) < 200:
prettydate = X.strftime("%A, %B %d")
print(prettydate + ": " + str(len(by_date[X])) + " courses")
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
for Y in by_date[X]:
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
print(Y)
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
online_courses = {} online_courses = {}
def prep_online_courses_df(): def prep_online_courses_df():
global online_courses global online_courses

View File

@ -2074,7 +2074,7 @@ def expand_old_semesters():
input('press return to continue.') input('press return to continue.')
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt # Input: xxxx_sched.json. Output: xxxx_latestarts.txt
def list_latestarts(term): def list_latestarts(term="sp23"):
show_summary = 1 show_summary = 1

View File

@ -30,6 +30,25 @@ from pipelines import header, url, fetch
#from localcache import local_data_folder, sqlite_file, db, user_goo_to_email #from localcache import local_data_folder, sqlite_file, db, user_goo_to_email
#########
######### BOOKSTORE
#########
#########
def scrape_bookstore():
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
#
# and also:
#
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
def survey_answer(q=0): def survey_answer(q=0):

View File

@ -42,7 +42,13 @@ def clean_title(st):
if len(st)>50: return st[:50]+'...' if len(st)>50: return st[:50]+'...'
return st return st
def int_or_zero(x):
if x == None: return 0
else: return int(x)
def float_or_zero(x):
if x == None: return 0
else: return float(x)
def match59(x): def match59(x):
if x['links']['context']==7959: return True if x['links']['context']==7959: return True