cleaning - courses, pipelines
This commit is contained in:
parent
89165b6a09
commit
55290f9fa1
71
courses.py
71
courses.py
|
|
@ -2,7 +2,7 @@ import json, re, requests, codecs, sys, time, funcy, os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from util import print_table
|
from util import print_table, int_or_zero, float_or_zero
|
||||||
|
|
||||||
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
|
from pipelines import fetch, fetch_stream, getSemesterSchedule, fetch_collapse, header, url, shortToLongSem
|
||||||
from pipelines import sems
|
from pipelines import sems
|
||||||
|
|
@ -19,14 +19,6 @@ stem_course_id = '11015' # TODO
|
||||||
#########
|
#########
|
||||||
#########
|
#########
|
||||||
|
|
||||||
def int_or_zero(x):
|
|
||||||
if x == None: return 0
|
|
||||||
else: return int(x)
|
|
||||||
|
|
||||||
def float_or_zero(x):
|
|
||||||
if x == None: return 0
|
|
||||||
else: return float(x)
|
|
||||||
|
|
||||||
# Gott 1 Bootcamp - report on who completed it.
|
# Gott 1 Bootcamp - report on who completed it.
|
||||||
def get_gott1_passers():
|
def get_gott1_passers():
|
||||||
course = '1561'
|
course = '1561'
|
||||||
|
|
@ -224,67 +216,6 @@ def change_course_ann_homepage(id="10458"):
|
||||||
print(r.text)
|
print(r.text)
|
||||||
|
|
||||||
|
|
||||||
#########
|
|
||||||
######### BOOKSTORE
|
|
||||||
#########
|
|
||||||
#########
|
|
||||||
|
|
||||||
def scrape_bookstore():
|
|
||||||
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
|
|
||||||
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
|
|
||||||
|
|
||||||
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
|
|
||||||
|
|
||||||
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
|
|
||||||
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
|
|
||||||
#
|
|
||||||
# and also:
|
|
||||||
#
|
|
||||||
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
|
|
||||||
|
|
||||||
# todo: where does the most recent schedule come from?
|
|
||||||
|
|
||||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
|
||||||
def list_latestarts():
|
|
||||||
#term = input("Name of current semester file? (ex: sp18) ")
|
|
||||||
term = "sp23" # sems[0]
|
|
||||||
|
|
||||||
term_in = "cache/" + term + "_sched.json"
|
|
||||||
term_out = "cache/" + term + "_latestarts.txt"
|
|
||||||
print("Writing output to " + term_out)
|
|
||||||
infile = open(term_in, "r")
|
|
||||||
outfile = open(term_out, "w")
|
|
||||||
sched = json.loads(infile.read())
|
|
||||||
#print sched
|
|
||||||
by_date = {}
|
|
||||||
for C in sched:
|
|
||||||
parts = C['date'].split("-")
|
|
||||||
start = parts[0]
|
|
||||||
codes = C['code'].split(' ')
|
|
||||||
dept = codes[0]
|
|
||||||
if dept in ['JLE','JFT','CWE']:
|
|
||||||
continue
|
|
||||||
if re.search('TBA',start): continue
|
|
||||||
try:
|
|
||||||
startd = parser.parse(start)
|
|
||||||
except Exception as e:
|
|
||||||
print(e, "\nproblem parsing ", start)
|
|
||||||
#print startd
|
|
||||||
if not startd in by_date:
|
|
||||||
by_date[startd] = []
|
|
||||||
by_date[startd].append(C)
|
|
||||||
for X in sorted(by_date.keys()):
|
|
||||||
#print "Start: " + str(X)
|
|
||||||
if len(by_date[X]) < 200:
|
|
||||||
prettydate = X.strftime("%A, %B %d")
|
|
||||||
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
|
||||||
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
|
||||||
for Y in by_date[X]:
|
|
||||||
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
|
||||||
print(Y)
|
|
||||||
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
|
||||||
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
# All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
|
# All students enrolled in a class in the given semester. Simpler verson of below. Return SET of course_ids.
|
||||||
def users_in_semester():
|
def users_in_semester():
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,53 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# todo: where does the most recent schedule come from?
|
||||||
|
|
||||||
|
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||||
|
def list_latestarts():
|
||||||
|
#term = input("Name of current semester file? (ex: sp18) ")
|
||||||
|
term = "sp23" # sems[0]
|
||||||
|
|
||||||
|
term_in = "cache/" + term + "_sched.json"
|
||||||
|
term_out = "cache/" + term + "_latestarts.txt"
|
||||||
|
print("Writing output to " + term_out)
|
||||||
|
infile = open(term_in, "r")
|
||||||
|
outfile = open(term_out, "w")
|
||||||
|
sched = json.loads(infile.read())
|
||||||
|
#print sched
|
||||||
|
by_date = {}
|
||||||
|
for C in sched:
|
||||||
|
parts = C['date'].split("-")
|
||||||
|
start = parts[0]
|
||||||
|
codes = C['code'].split(' ')
|
||||||
|
dept = codes[0]
|
||||||
|
if dept in ['JLE','JFT','CWE']:
|
||||||
|
continue
|
||||||
|
if re.search('TBA',start): continue
|
||||||
|
try:
|
||||||
|
startd = parser.parse(start)
|
||||||
|
except Exception as e:
|
||||||
|
print(e, "\nproblem parsing ", start)
|
||||||
|
#print startd
|
||||||
|
if not startd in by_date:
|
||||||
|
by_date[startd] = []
|
||||||
|
by_date[startd].append(C)
|
||||||
|
for X in sorted(by_date.keys()):
|
||||||
|
#print "Start: " + str(X)
|
||||||
|
if len(by_date[X]) < 200:
|
||||||
|
prettydate = X.strftime("%A, %B %d")
|
||||||
|
print(prettydate + ": " + str(len(by_date[X])) + " courses")
|
||||||
|
outfile.write(prettydate + ": " + str(len(by_date[X])) + " courses" + "\n")
|
||||||
|
for Y in by_date[X]:
|
||||||
|
#print "\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher']
|
||||||
|
print(Y)
|
||||||
|
#outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] +"\n")
|
||||||
|
outfile.write("\t" + Y['code'] + " " + Y['crn'] + "\t" + Y['teacher'] + "\t" + Y['type'] + "\t" + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
online_courses = {}
|
online_courses = {}
|
||||||
def prep_online_courses_df():
|
def prep_online_courses_df():
|
||||||
global online_courses
|
global online_courses
|
||||||
|
|
|
||||||
|
|
@ -2074,7 +2074,7 @@ def expand_old_semesters():
|
||||||
input('press return to continue.')
|
input('press return to continue.')
|
||||||
|
|
||||||
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
# Input: xxxx_sched.json. Output: xxxx_latestarts.txt
|
||||||
def list_latestarts(term):
|
def list_latestarts(term="sp23"):
|
||||||
|
|
||||||
show_summary = 1
|
show_summary = 1
|
||||||
|
|
||||||
|
|
|
||||||
19
tasks.py
19
tasks.py
|
|
@ -30,6 +30,25 @@ from pipelines import header, url, fetch
|
||||||
|
|
||||||
#from localcache import local_data_folder, sqlite_file, db, user_goo_to_email
|
#from localcache import local_data_folder, sqlite_file, db, user_goo_to_email
|
||||||
|
|
||||||
|
#########
|
||||||
|
######### BOOKSTORE
|
||||||
|
#########
|
||||||
|
#########
|
||||||
|
|
||||||
|
def scrape_bookstore():
|
||||||
|
big_courselist_url = "https://svc.bkstr.com/courseMaterial/courses?storeId=10190&termId=100058761"
|
||||||
|
bcu_cached = json.loads( open('cache/bookstore_courses.json','r').read() )
|
||||||
|
|
||||||
|
one_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # NO TEXT
|
||||||
|
|
||||||
|
another_section = "https://svc.bkstr.com/courseMaterial/results?storeId=10190&langId=-1&catalogId=11077&requestType=DDCSBrowse" # 3 REQUIRED at:
|
||||||
|
# [""0""].courseSectionDTO[""0""].courseMaterialResultsList
|
||||||
|
#
|
||||||
|
# and also:
|
||||||
|
#
|
||||||
|
# [""0""].courseSectionDTO[""0""].sectionAdoptionDTO.materialAdoptions
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def survey_answer(q=0):
|
def survey_answer(q=0):
|
||||||
|
|
||||||
|
|
|
||||||
6
util.py
6
util.py
|
|
@ -42,7 +42,13 @@ def clean_title(st):
|
||||||
if len(st)>50: return st[:50]+'...'
|
if len(st)>50: return st[:50]+'...'
|
||||||
return st
|
return st
|
||||||
|
|
||||||
|
def int_or_zero(x):
|
||||||
|
if x == None: return 0
|
||||||
|
else: return int(x)
|
||||||
|
|
||||||
|
def float_or_zero(x):
|
||||||
|
if x == None: return 0
|
||||||
|
else: return float(x)
|
||||||
|
|
||||||
def match59(x):
|
def match59(x):
|
||||||
if x['links']['context']==7959: return True
|
if x['links']['context']==7959: return True
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue