cleaning - courses, pipelines
This commit is contained in:
parent
89165b6a09
commit
55290f9fa1
71
courses.py
71
courses.py
|
|
@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os
|
|||
import pandas as pd
|
||||
from dateutil import parser
|
||||
from datetime import datetime
|
||||
from util import print_table
|
||||
from util import print_table, int_or_zero, float_or_zero
|
||||
|
||||
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
|
||||
from pipelines import sems
|
||||
|
|
@ -19,14 +19,6 @@ stem_course_id = '11015' # TODO
|
|||
#########
|
||||
#########
|
||||
|
||||
def int_or_zero(x):
|
||||
if x == None: return 0
|
||||
else: return int(x)
|
||||
|
||||
def float_or_zero(x):
|
||||
if x == None: return 0
|
||||
else: return float(x)
|
||||
|
||||
# Gott 1 Bootcamp - report on who completed it.
|
||||
def get_gott1_passers():
|
||||
course = '1561'
|
||||
|
|
@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"):
|
|||
print(r.text)
|
||||
|
||||
|
||||
#########
|
||||
######### BOOKSTORE
|
||||
#########
|
||||
#########
|
||||
|
||||
def scrape_bookstore():
|
||||
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
|
||||
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
|
||||
|
||||
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
|
||||
|
||||
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
|
||||
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
|
||||
#
|
||||
# and also:
|
||||
#
|
||||
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
|
||||
|
||||
# todo: where does the most recent schedule come from?
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts():
|
||||
#term = input("Name of current semester file? (ex: sp18) ")
|
||||
term = "sp23" # sems[0]
|
||||
|
||||
term_in = "cache/" + term + "_sched.json"
|
||||
term_out = "cache/" + term + "_latestarts.txt"
|
||||
print("Writing output to " + term_out)
|
||||
infile = open(term_in, "r")
|
||||
outfile = open(term_out, "w")
|
||||
sched = json.loads(infile.read())
|
||||
#print sched
|
||||
by_date = {}
|
||||
for C in sched:
|
||||
parts = C['date'].split("-")
|
||||
start = parts[0]
|
||||
codes = C['code'].split(' ')
|
||||
dept = codes[0]
|
||||
if dept in ['JLE','JFT','CWE']:
|
||||
continue
|
||||
if re.search('TBA',start): continue
|
||||
try:
|
||||
startd = parser.parse(start)
|
||||
except Exception as e:
|
||||
print(e, "\nproblem parsing ", start)
|
||||
#print startd
|
||||
if not startd in by_date:
|
||||
by_date[startd] = []
|
||||
by_date[startd].append(C)
|
||||
for X in sorted(by_date.keys()):
|
||||
#print "Start: " + str(X)
|
||||
if len(by_date[X]) < 200:
|
||||
prettydate = X.strftime("%A, %B %d")
|
||||
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
||||
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
||||
for Y in by_date[X]:
|
||||
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
||||
print(Y)
|
||||
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
||||
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
||||
|
||||
|
||||
# All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
|
||||
def users_in_semester():
|
||||
|
|
|
|||
|
|
@ -6,6 +6,53 @@
|
|||
|
||||
|
||||
|
||||
|
||||
# todo: where does the most recent schedule come from?
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts():
|
||||
#term = input("Name of current semester file? (ex: sp18) ")
|
||||
term = "sp23" # sems[0]
|
||||
|
||||
term_in = "cache/" + term + "_sched.json"
|
||||
term_out = "cache/" + term + "_latestarts.txt"
|
||||
print("Writing output to " + term_out)
|
||||
infile = open(term_in, "r")
|
||||
outfile = open(term_out, "w")
|
||||
sched = json.loads(infile.read())
|
||||
#print sched
|
||||
by_date = {}
|
||||
for C in sched:
|
||||
parts = C['date'].split("-")
|
||||
start = parts[0]
|
||||
codes = C['code'].split(' ')
|
||||
dept = codes[0]
|
||||
if dept in ['JLE','JFT','CWE']:
|
||||
continue
|
||||
if re.search('TBA',start): continue
|
||||
try:
|
||||
startd = parser.parse(start)
|
||||
except Exception as e:
|
||||
print(e, "\nproblem parsing ", start)
|
||||
#print startd
|
||||
if not startd in by_date:
|
||||
by_date[startd] = []
|
||||
by_date[startd].append(C)
|
||||
for X in sorted(by_date.keys()):
|
||||
#print "Start: " + str(X)
|
||||
if len(by_date[X]) < 200:
|
||||
prettydate = X.strftime("%A, %B %d")
|
||||
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
||||
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
||||
for Y in by_date[X]:
|
||||
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
||||
print(Y)
|
||||
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
||||
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
||||
|
||||
|
||||
|
||||
|
||||
online_courses = {}
|
||||
def prep_online_courses_df():
|
||||
global online_courses
|
||||
|
|
|
|||
|
|
@ -2074,7 +2074,7 @@ def expand_old_semesters():
|
|||
input('press return to continue.')
|
||||
|
||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||
def list_latestarts(term):
|
||||
def list_latestarts(term="sp23"):
|
||||
|
||||
show_summary = 1
|
||||
|
||||
|
|
|
|||
19
tasks.py
19
tasks.py
|
|
@ -30,6 +30,25 @@ from pipelines import header, url, fetch
|
|||
|
||||
#from localcache import local_data_folder, sqlite_file, db, user_goo_to_email
|
||||
|
||||
#########
|
||||
######### BOOKSTORE
|
||||
#########
|
||||
#########
|
||||
|
||||
def scrape_bookstore():
|
||||
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
|
||||
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
|
||||
|
||||
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
|
||||
|
||||
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
|
||||
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
|
||||
#
|
||||
# and also:
|
||||
#
|
||||
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
|
||||
|
||||
|
||||
|
||||
def survey_answer(q=0):
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue