req.txt and ezproxy script
This commit is contained in:
parent
36008e461b
commit
e7a80d8880
32
content.py
32
content.py
|
|
@ -1248,6 +1248,37 @@ def search_embeddings():
|
|||
print(f'Top {i+1}: {r}, {search_index[r]}') #{file} - {sentence} - (Score: {score})')
|
||||
|
||||
|
||||
|
||||
def repairy_ezproxy_links():
|
||||
from localcache2 import pages_in_term
|
||||
|
||||
# get all pages in term
|
||||
all_pages = pages_in_term()
|
||||
|
||||
# c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
|
||||
for p in all_pages:
|
||||
course = p[1]
|
||||
title = p[4]
|
||||
url = p[5]
|
||||
body = p[7]
|
||||
# print(body)
|
||||
try:
|
||||
#s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
|
||||
a = re.search(r'Online Library Services',title)
|
||||
if a:
|
||||
continue
|
||||
s = re.findall('\n.*ezproxy.*\n',body)
|
||||
if s:
|
||||
print(course, title, url)
|
||||
print(" ", s, "\n") # s.group())
|
||||
except Exception as e:
|
||||
#print(f"Skipped: {title}, {e}")
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print ('')
|
||||
|
|
@ -1267,6 +1298,7 @@ if __name__ == "__main__":
|
|||
14: ['do a vector search', search_embeddings],
|
||||
15: ['test priority', test_priority],
|
||||
16: ['test embed', test_embed],
|
||||
17: ['repair ezproxy links', repairy_ezproxy_links],
|
||||
}
|
||||
|
||||
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,204 @@
|
|||
# Local data, saving and manipulating
|
||||
|
||||
import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
from datetime import datetime as dt
|
||||
from datetime import timedelta
|
||||
from dateutil.parser import parse
|
||||
from os.path import exists, getmtime
|
||||
from pipelines import sync_non_interactive, url, header, gp, dean
|
||||
from tabulate import tabulate
|
||||
|
||||
#########
|
||||
######### LOCAL DB
|
||||
#########
|
||||
|
||||
CON = ''
|
||||
CURSOR = ''
|
||||
|
||||
def db():
|
||||
global CON,CURSOR
|
||||
CON = psycopg2.connect(database="db",
|
||||
host="192.168.1.6",
|
||||
user="postgres",
|
||||
password="rolley34",
|
||||
port="5432")
|
||||
|
||||
CURSOR = CON.cursor()
|
||||
return CON,CURSOR
|
||||
|
||||
|
||||
'''
|
||||
# Help the next function to upload new users directly to conf database on gavilan.
|
||||
def employees_refresh_flex(data):
|
||||
try:
|
||||
data['a'] = 'set/newuser'
|
||||
data['sis_user_id'] = data['sis_user_id'][3:]
|
||||
print("\nUploading this: \n")
|
||||
print(json.dumps(data, indent=2))
|
||||
print("\n")
|
||||
a = input("Continue (y) or skip (n) ? ")
|
||||
if a == 'y':
|
||||
# This is what I was missing..........
|
||||
# req.add_header("Content-type", "application/x-www-form-urlencoded")
|
||||
r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data)
|
||||
print(r3.text)
|
||||
#print(r3.headers)
|
||||
except Exception as ex:
|
||||
print("Failed on: %s\nErr: %s" % (str(data),str(ex)))
|
||||
|
||||
|
||||
|
||||
# Everyone in iLearn DB with an xyz@gavilan.edu email address.
|
||||
def all_gav_employees():
|
||||
(connection,cursor) = db()
|
||||
connection.row_factory = dict_factory
|
||||
q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state,
|
||||
h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip,
|
||||
p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u
|
||||
JOIN comm_channel AS h ON u.id=h.user_id
|
||||
JOIN pseudonym AS p ON p.user_id=u.id
|
||||
WHERE h.address LIKE "%@gavilan.edu"
|
||||
ORDER BY u.sortablename"""
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(q)
|
||||
everyone = cursor.fetchall()
|
||||
everyone_set = set()
|
||||
for E in everyone:
|
||||
try:
|
||||
everyone_set.add( E['address'].lower() )
|
||||
except Exception as e:
|
||||
print("Exception: %s\nwith: %s" % (str(e), str(E)))
|
||||
|
||||
oo = open('cache/temp1.txt','w')
|
||||
oo.write(json.dumps(list(everyone_set), indent=2))
|
||||
existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users')
|
||||
ex = json.loads( existing.text )
|
||||
already_enrolled = set()
|
||||
for usr in ex['users']:
|
||||
try:
|
||||
#already_enrolled.add( (usr['goo'], usr['email'].lower(), usr['name']) )
|
||||
already_enrolled.add( usr['email'].lower() )
|
||||
except Exception as e:
|
||||
print("Exception: %s\nWith: %s" % (str(e),str(usr)))
|
||||
|
||||
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
|
||||
oo.write(json.dumps(list(already_enrolled), indent=2))
|
||||
|
||||
# conf_users wants: goo, email, name, active
|
||||
# and emails have random capitalization
|
||||
# name is First Last, and sometimes with Middle in there.
|
||||
#
|
||||
|
||||
# using sets: to_enroll = [ x for x in students if x not in already_enrolled ]
|
||||
new_emp = [ x for x in everyone_set if x not in already_enrolled ]
|
||||
|
||||
# take the all_employee list, filter -> anyone who's in 'existing' is removed
|
||||
|
||||
# funcy.where( lambda x: x['email'] == ae[4] , existing )
|
||||
|
||||
#new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ), all_emp ))
|
||||
#new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] ))
|
||||
print(new_emp)
|
||||
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
|
||||
oo.write(json.dumps(list(new_emp), indent=2))
|
||||
|
||||
# Now, iLearn db (everyone)... find the rows that match the email addresses
|
||||
# that we've decided we need to add (new_emp)
|
||||
|
||||
#print(everyone)
|
||||
#print( "searching for %s" % j )
|
||||
#print( "searched for %s, found: %s" % (j, str(to_add) ))
|
||||
#print("\nUploading...\n")
|
||||
for j in new_emp:
|
||||
#j = new_emp[0]
|
||||
print(j)
|
||||
to_add = list(funcy.where( everyone, address=j ))
|
||||
if to_add:
|
||||
employees_refresh_flex(to_add[0])
|
||||
else:
|
||||
print("Didn't find an entry for that account.")
|
||||
print("done uploading")
|
||||
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def teachers_by_term(TERM = "202430"):
|
||||
q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.created_at, c.start_at, c.workflow_state, e.last_attended_at,
|
||||
u.id, u.sortable_name, u.created_at FROM canvas.courses AS c
|
||||
JOIN canvas.enrollments AS e ON e.course_id=c.id
|
||||
JOIN canvas.users AS u ON u.id=e.user_id
|
||||
WHERE c.sis_source_id LIKE '{TERM}%' AND e.type='TeacherEnrollment' ORDER BY u.sortable_name, c.course_code;"""
|
||||
(connection,cursor) = db()
|
||||
cursor.execute(q)
|
||||
all_teachers = cursor.fetchall()
|
||||
|
||||
table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers]
|
||||
print(tabulate(table))
|
||||
|
||||
#for t in all_teachers:
|
||||
# print("\t".join( [str(x) for x in [t[9],t[1],t[3],t[6]]]))
|
||||
return all_teachers
|
||||
|
||||
|
||||
|
||||
def courses_in_term(TERM = "202430"):
|
||||
q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.workflow_state FROM canvas.courses AS c
|
||||
WHERE c.sis_source_id LIKE '{TERM}%' ORDER BY c.course_code;"""
|
||||
(connection,cursor) = db()
|
||||
cursor.execute(q)
|
||||
all = cursor.fetchall()
|
||||
|
||||
#table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers]
|
||||
print(tabulate(all))
|
||||
|
||||
|
||||
|
||||
def pages_in_term(TERM="202430"): #
|
||||
q = f"""SELECT c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
|
||||
FROM canvas.courses c
|
||||
JOIN canvas.wiki_pages wp ON wp.context_id=c.id
|
||||
WHERE c.sis_source_id LIKE '{TERM}%'
|
||||
ORDER BY c.sis_source_id, wp.title;"""
|
||||
(connection,cursor) = db()
|
||||
cursor.execute(q)
|
||||
all = cursor.fetchall()
|
||||
#print(tabulate(all))
|
||||
return all
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print ('')
|
||||
options = {
|
||||
1: ['all teachers', teachers_by_term],
|
||||
2: ['courses in term', courses_in_term],
|
||||
3: ['pages in term', pages_in_term]
|
||||
}
|
||||
|
||||
|
||||
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
|
||||
resp = int(sys.argv[1])
|
||||
print("\n\nPerforming: %s\n\n" % options[resp][0])
|
||||
|
||||
else:
|
||||
print ('')
|
||||
for key in options:
|
||||
print(str(key) + '.\t' + options[key][0])
|
||||
|
||||
print('')
|
||||
resp = input('Choose: ')
|
||||
|
||||
# Call the function in the options dict
|
||||
options[ int(resp)][1]()
|
||||
|
||||
|
||||
70
pipelines.py
70
pipelines.py
|
|
@ -13,6 +13,12 @@ from deepdiff import DeepDiff
|
|||
from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
|
||||
from canvas_secrets import instructure_url, instructure_username, instructure_private_key
|
||||
|
||||
import os, asyncio
|
||||
from dap.api import DAPClient
|
||||
from dap.dap_types import Credentials
|
||||
from dap.integration.database import DatabaseConnection
|
||||
from dap.replicator.sql import SQLReplicator
|
||||
|
||||
|
||||
|
||||
"""
|
||||
|
|
@ -432,6 +438,64 @@ def get_enrlmts_for_user(user,enrollments):
|
|||
################
|
||||
|
||||
|
||||
# Get canvas data 2024 style
|
||||
def canvas_data_2024_run():
|
||||
print("Updating all tables.")
|
||||
asyncio.run(canvas_data_2024())
|
||||
print("Done with all tables.")
|
||||
|
||||
|
||||
async def canvas_data_2024():
|
||||
|
||||
base_url: str = os.environ["DAP_API_URL"]
|
||||
client_id: str = os.environ["DAP_CLIENT_ID"]
|
||||
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
|
||||
connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
|
||||
|
||||
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
|
||||
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
|
||||
|
||||
async with DatabaseConnection(connection_string).open() as db_connection:
|
||||
async with DAPClient(base_url, credentials) as session:
|
||||
#tables = await session.get_tables("canvas")
|
||||
for table in desired_tables:
|
||||
print(f" trying to update {table} ")
|
||||
try:
|
||||
#await SQLReplicator(session, db_connection).initialize("canvas", table)
|
||||
await SQLReplicator(session, db_connection).synchronize("canvas", table)
|
||||
except Exception as e:
|
||||
print(f" - skipping {table} because {e}")
|
||||
|
||||
|
||||
|
||||
# Get canvas data 2024 style
|
||||
def setup_canvas_data_2024_run():
|
||||
print("Setting up all tables.")
|
||||
asyncio.run(setup_canvas_data_2024())
|
||||
print("Done with all tables.")
|
||||
|
||||
|
||||
async def setup_canvas_data_2024():
|
||||
|
||||
base_url: str = os.environ["DAP_API_URL"]
|
||||
client_id: str = os.environ["DAP_CLIENT_ID"]
|
||||
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
|
||||
connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
|
||||
|
||||
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
|
||||
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
|
||||
|
||||
async with DatabaseConnection(connection_string).open() as db_connection:
|
||||
async with DAPClient(base_url, credentials) as session:
|
||||
#tables = await session.get_tables("canvas")
|
||||
for table in desired_tables:
|
||||
print(f" {table}")
|
||||
try:
|
||||
await SQLReplicator(session, db_connection).initialize("canvas", table)
|
||||
except Exception as e:
|
||||
print(f" - skipping {table} because {e}")
|
||||
|
||||
|
||||
# Get something from Canvas Data
|
||||
def do_request(path):
|
||||
#Set up the request pieces
|
||||
|
|
@ -2199,9 +2263,11 @@ if __name__ == "__main__":
|
|||
5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] ,
|
||||
6: ['Canvas data: interactive sync', interactive ],
|
||||
7: ['Canvas data: automated sync', sync_non_interactive ],
|
||||
8: ['Scrape schedule from ssb', scrape_schedule_multi ],
|
||||
8: ['Get canvas data 2024 style', canvas_data_2024_run ],
|
||||
9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
|
||||
16: ['Scrape schedule from ssb', scrape_schedule_multi ],
|
||||
14: ['Generate latestart schedule', list_latestarts ],
|
||||
9: ['Test ssb calls with python', scrape_schedule_py ],
|
||||
15: ['Test ssb calls with python', scrape_schedule_py ],
|
||||
10: ['schedule to db', scrape_for_db ],
|
||||
11: ['clean argos draft schedule file', argos_data_from_cvc],
|
||||
12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
|
||||
|
|
|
|||
|
|
@ -1,30 +1,41 @@
|
|||
absl-py==2.1.0
|
||||
aiofiles==23.2.1
|
||||
aiohttp==3.9.3
|
||||
aiohttp-retry==2.8.3
|
||||
aiomysql==0.2.0
|
||||
aiosignal==1.3.1
|
||||
annotated-types==0.6.0
|
||||
annoy==1.17.3
|
||||
arrow==1.3.0
|
||||
async-timeout==4.0.3
|
||||
asyncpg==0.29.0
|
||||
attrs==23.2.0
|
||||
Automat==22.10.0
|
||||
Babel==2.14.0
|
||||
bcrypt==4.1.2
|
||||
beautifulsoup4==4.12.3
|
||||
bidict==0.22.1
|
||||
blinker==1.7.0
|
||||
blis==0.7.11
|
||||
cachetools==5.3.2
|
||||
catalogue==2.0.10
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
cloudpathlib==0.16.0
|
||||
colorama==0.4.6
|
||||
confection==0.1.4
|
||||
constantly==23.10.4
|
||||
contourpy==1.2.0
|
||||
courlan==1.0.0
|
||||
cryptography==42.0.2
|
||||
cssselect==1.2.0
|
||||
cycler==0.12.1
|
||||
cymem==2.0.8
|
||||
dateparser==1.2.0
|
||||
deepdiff==6.7.1
|
||||
dm-tree==0.1.8
|
||||
docxcompose==1.4.0
|
||||
docxtpl==0.16.7
|
||||
durable-rules==2.0.28
|
||||
|
|
@ -46,22 +57,30 @@ google-auth-oauthlib==1.2.0
|
|||
googleapis-common-protos==1.62.0
|
||||
greenlet==3.0.3
|
||||
h11==0.14.0
|
||||
h5py==3.10.0
|
||||
html2markdown==0.1.7
|
||||
htmldate==1.7.0
|
||||
httplib2==0.22.0
|
||||
huggingface-hub==0.20.3
|
||||
hyperlink==21.0.0
|
||||
ics==0.7.2
|
||||
idna==3.6
|
||||
incremental==22.10.0
|
||||
instructure-dap-client==0.3.18
|
||||
itemadapter==0.8.0
|
||||
itemloaders==1.1.0
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.3
|
||||
jmespath==1.0.1
|
||||
joblib==1.3.2
|
||||
json_strong_typing==0.3.2
|
||||
jsondiff==2.0.0
|
||||
jsonschema==4.21.1
|
||||
jsonschema-specifications==2023.12.1
|
||||
jusText==3.0.0
|
||||
keras==3.0.4
|
||||
kiwisolver==1.4.5
|
||||
kneed==0.8.5
|
||||
langcodes==3.3.0
|
||||
lark==1.1.9
|
||||
linkify-it-py==2.0.2
|
||||
|
|
@ -76,6 +95,8 @@ mdurl==0.1.2
|
|||
minizinc==0.9.0
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.4
|
||||
murmurhash==1.0.10
|
||||
namex==0.0.7
|
||||
networkx==3.2.1
|
||||
nltk==3.8.1
|
||||
numpy==1.26.3
|
||||
|
|
@ -89,23 +110,33 @@ paho-mqtt==1.6.1
|
|||
pampy==0.3.0
|
||||
pandas==2.2.0
|
||||
paramiko==3.4.0
|
||||
parsel==1.8.1
|
||||
path-dict==4.0.0
|
||||
pathlib==1.0.1
|
||||
patsy==0.5.6
|
||||
pdfminer==20191125
|
||||
pdfminer.six==20231228
|
||||
piexif==1.1.3
|
||||
pillow==10.2.0
|
||||
plotly==5.18.0
|
||||
preshed==3.0.9
|
||||
Protego==0.3.0
|
||||
protobuf==4.25.2
|
||||
psycopg2==2.9.9
|
||||
pyarrow==15.0.0
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.21
|
||||
pycryptodome==3.20.0
|
||||
pydantic==2.6.1
|
||||
pydantic_core==2.16.2
|
||||
PyDispatcher==2.0.7
|
||||
pygame==2.5.2
|
||||
Pygments==2.17.2
|
||||
PyJWT==2.8.0
|
||||
PyMySQL==1.1.0
|
||||
PyNaCl==1.5.0
|
||||
pyOpenSSL==24.0.0
|
||||
pypandoc==1.12
|
||||
pyparsing==3.1.1
|
||||
PyPDF2==3.0.1
|
||||
|
|
@ -118,10 +149,12 @@ python-socketio==5.11.0
|
|||
pytz==2024.1
|
||||
pywin32==306
|
||||
PyYAML==6.0.1
|
||||
queuelib==1.6.2
|
||||
redis==5.0.1
|
||||
referencing==0.33.0
|
||||
regex==2023.12.25
|
||||
requests==2.31.0
|
||||
requests-file==2.0.0
|
||||
requests-oauthlib==1.3.1
|
||||
rich==13.7.0
|
||||
rpds-py==0.17.1
|
||||
|
|
@ -130,9 +163,11 @@ safetensors==0.4.2
|
|||
schedule==1.2.1
|
||||
scikit-learn==1.4.0
|
||||
scipy==1.12.0
|
||||
Scrapy==2.11.0
|
||||
selenium==4.17.2
|
||||
sentence-transformers==2.3.1
|
||||
sentencepiece==0.1.99
|
||||
service-identity==24.1.0
|
||||
simple-websocket==1.0.0
|
||||
simpy==4.1.1
|
||||
six==1.16.0
|
||||
|
|
@ -140,14 +175,23 @@ smart-open==6.4.0
|
|||
sniffio==1.3.0
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.5
|
||||
spacy==3.7.2
|
||||
spacy-legacy==3.0.12
|
||||
spacy-loggers==1.0.5
|
||||
SQLAlchemy==2.0.25
|
||||
srsly==2.4.8
|
||||
statsmodels==0.14.1
|
||||
striprtf==0.0.26
|
||||
sympy==1.12
|
||||
tabulate==0.9.0
|
||||
TatSu==5.11.3
|
||||
tenacity==8.2.3
|
||||
textdistance==4.6.1
|
||||
textual==0.48.2
|
||||
thinc==8.2.2
|
||||
threadpoolctl==3.2.0
|
||||
tld==0.13
|
||||
tldextract==5.1.1
|
||||
tokenizers==0.15.1
|
||||
tomd==0.1.3
|
||||
toolz==0.12.1
|
||||
|
|
@ -157,6 +201,9 @@ trafilatura==1.7.0
|
|||
transformers==4.37.2
|
||||
trio==0.24.0
|
||||
trio-websocket==0.11.1
|
||||
Twisted==22.10.0
|
||||
twisted-iocpsupport==1.0.4
|
||||
typer==0.9.0
|
||||
types-aiofiles==23.2.0.20240106
|
||||
types-python-dateutil==2.8.19.20240106
|
||||
typing_extensions==4.9.0
|
||||
|
|
@ -165,9 +212,13 @@ tzlocal==5.2
|
|||
uc-micro-py==1.0.2
|
||||
uritemplate==4.1.1
|
||||
urllib3==2.2.0
|
||||
w3lib==2.1.2
|
||||
wasabi==1.1.2
|
||||
weasel==0.3.4
|
||||
Werkzeug==3.0.1
|
||||
Whoosh==2.7.4
|
||||
wsproto==1.2.0
|
||||
xlwt==1.3.0
|
||||
yarl==1.9.4
|
||||
yattag==1.15.2
|
||||
zope.interface==6.1
|
||||
|
|
|
|||
Loading…
Reference in New Issue