req.txt and ezproxy script

This commit is contained in:
Coding with Peter 2024-02-06 07:45:07 -08:00
parent 36008e461b
commit e7a80d8880
4 changed files with 355 additions and 2 deletions

View File

@ -1248,6 +1248,37 @@ def search_embeddings():
print(f'Top {i+1}: {r}, {search_index[r]}') #{file} - {sentence} - (Score: {score})') print(f'Top {i+1}: {r}, {search_index[r]}') #{file} - {sentence} - (Score: {score})')
def repairy_ezproxy_links():
from localcache2 import pages_in_term
# get all pages in term
all_pages = pages_in_term()
# c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
for p in all_pages:
course = p[1]
title = p[4]
url = p[5]
body = p[7]
# print(body)
try:
#s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
a = re.search(r'Online Library Services',title)
if a:
continue
s = re.findall('\n.*ezproxy.*\n',body)
if s:
print(course, title, url)
print(" ", s, "\n") # s.group())
except Exception as e:
#print(f"Skipped: {title}, {e}")
pass
if __name__ == "__main__": if __name__ == "__main__":
print ('') print ('')
@ -1267,6 +1298,7 @@ if __name__ == "__main__":
14: ['do a vector search', search_embeddings], 14: ['do a vector search', search_embeddings],
15: ['test priority', test_priority], 15: ['test priority', test_priority],
16: ['test embed', test_embed], 16: ['test embed', test_embed],
17: ['repair ezproxy links', repairy_ezproxy_links],
} }
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]): if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):

204
localcache2.py Normal file
View File

@ -0,0 +1,204 @@
# Local data, saving and manipulating
import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime as dt
from datetime import timedelta
from dateutil.parser import parse
from os.path import exists, getmtime
from pipelines import sync_non_interactive, url, header, gp, dean
from tabulate import tabulate
#########
######### LOCAL DB
#########
CON = ''
CURSOR = ''
def db():
global CON,CURSOR
CON = psycopg2.connect(database="db",
host="192.168.1.6",
user="postgres",
password="rolley34",
port="5432")
CURSOR = CON.cursor()
return CON,CURSOR
'''
# Help the next function to upload new users directly to conf database on gavilan.
def employees_refresh_flex(data):
try:
data['a'] = 'set/newuser'
data['sis_user_id'] = data['sis_user_id'][3:]
print("\nUploading this: \n")
print(json.dumps(data, indent=2))
print("\n")
a = input("Continue (y) or skip (n) ? ")
if a == 'y':
# This is what I was missing..........
# req.add_header("Content-type", "application/x-www-form-urlencoded")
r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data)
print(r3.text)
#print(r3.headers)
except Exception as ex:
print("Failed on: %s\nErr: %s" % (str(data),str(ex)))
# Everyone in iLearn DB with an xyz@gavilan.edu email address.
def all_gav_employees():
(connection,cursor) = db()
connection.row_factory = dict_factory
q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state,
h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip,
p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u
JOIN comm_channel AS h ON u.id=h.user_id
JOIN pseudonym AS p ON p.user_id=u.id
WHERE h.address LIKE "%@gavilan.edu"
ORDER BY u.sortablename"""
cursor = connection.cursor()
cursor.execute(q)
everyone = cursor.fetchall()
everyone_set = set()
for E in everyone:
try:
everyone_set.add( E['address'].lower() )
except Exception as e:
print("Exception: %s\nwith: %s" % (str(e), str(E)))
oo = open('cache/temp1.txt','w')
oo.write(json.dumps(list(everyone_set), indent=2))
existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users')
ex = json.loads( existing.text )
already_enrolled = set()
for usr in ex['users']:
try:
#already_enrolled.add( (usr['goo'], usr['email'].lower(), usr['name']) )
already_enrolled.add( usr['email'].lower() )
except Exception as e:
print("Exception: %s\nWith: %s" % (str(e),str(usr)))
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
oo.write(json.dumps(list(already_enrolled), indent=2))
# conf_users wants: goo, email, name, active
# and emails have random capitalization
# name is First Last, and sometimes with Middle in there.
#
# using sets: to_enroll = [ x for x in students if x not in already_enrolled ]
new_emp = [ x for x in everyone_set if x not in already_enrolled ]
# take the all_employee list, filter -> anyone who's in 'existing' is removed
# funcy.where( lambda x: x['email'] == ae[4] , existing )
#new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ), all_emp ))
#new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] ))
print(new_emp)
oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
oo.write(json.dumps(list(new_emp), indent=2))
# Now, iLearn db (everyone)... find the rows that match the email addresses
# that we've decided we need to add (new_emp)
#print(everyone)
#print( "searching for %s" % j )
#print( "searched for %s, found: %s" % (j, str(to_add) ))
#print("\nUploading...\n")
for j in new_emp:
#j = new_emp[0]
print(j)
to_add = list(funcy.where( everyone, address=j ))
if to_add:
employees_refresh_flex(to_add[0])
else:
print("Didn't find an entry for that account.")
print("done uploading")
'''
def teachers_by_term(TERM = "202430"):
q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.created_at, c.start_at, c.workflow_state, e.last_attended_at,
u.id, u.sortable_name, u.created_at FROM canvas.courses AS c
JOIN canvas.enrollments AS e ON e.course_id=c.id
JOIN canvas.users AS u ON u.id=e.user_id
WHERE c.sis_source_id LIKE '{TERM}%' AND e.type='TeacherEnrollment' ORDER BY u.sortable_name, c.course_code;"""
(connection,cursor) = db()
cursor.execute(q)
all_teachers = cursor.fetchall()
table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers]
print(tabulate(table))
#for t in all_teachers:
# print("\t".join( [str(x) for x in [t[9],t[1],t[3],t[6]]]))
return all_teachers
def courses_in_term(TERM = "202430"):
q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.workflow_state FROM canvas.courses AS c
WHERE c.sis_source_id LIKE '{TERM}%' ORDER BY c.course_code;"""
(connection,cursor) = db()
cursor.execute(q)
all = cursor.fetchall()
#table = [ [t[9],t[1],t[3],t[6]] for t in all_teachers]
print(tabulate(all))
def pages_in_term(TERM="202430"): #
q = f"""SELECT c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
FROM canvas.courses c
JOIN canvas.wiki_pages wp ON wp.context_id=c.id
WHERE c.sis_source_id LIKE '{TERM}%'
ORDER BY c.sis_source_id, wp.title;"""
(connection,cursor) = db()
cursor.execute(q)
all = cursor.fetchall()
#print(tabulate(all))
return all
if __name__ == "__main__":
print ('')
options = {
1: ['all teachers', teachers_by_term],
2: ['courses in term', courses_in_term],
3: ['pages in term', pages_in_term]
}
if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
resp = int(sys.argv[1])
print("\n\nPerforming: %s\n\n" % options[resp][0])
else:
print ('')
for key in options:
print(str(key) + '.\t' + options[key][0])
print('')
resp = input('Choose: ')
# Call the function in the options dict
options[ int(resp)][1]()

View File

@ -13,6 +13,12 @@ from deepdiff import DeepDiff
from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
from canvas_secrets import instructure_url, instructure_username, instructure_private_key from canvas_secrets import instructure_url, instructure_username, instructure_private_key
import os, asyncio
from dap.api import DAPClient
from dap.dap_types import Credentials
from dap.integration.database import DatabaseConnection
from dap.replicator.sql import SQLReplicator
""" """
@ -432,6 +438,64 @@ def get_enrlmts_for_user(user,enrollments):
################ ################
# Get canvas data 2024 style
def canvas_data_2024_run():
print("Updating all tables.")
asyncio.run(canvas_data_2024())
print("Done with all tables.")
async def canvas_data_2024():
base_url: str = os.environ["DAP_API_URL"]
client_id: str = os.environ["DAP_CLIENT_ID"]
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
async with DatabaseConnection(connection_string).open() as db_connection:
async with DAPClient(base_url, credentials) as session:
#tables = await session.get_tables("canvas")
for table in desired_tables:
print(f" trying to update {table} ")
try:
#await SQLReplicator(session, db_connection).initialize("canvas", table)
await SQLReplicator(session, db_connection).synchronize("canvas", table)
except Exception as e:
print(f" - skipping {table} because {e}")
# Get canvas data 2024 style
def setup_canvas_data_2024_run():
print("Setting up all tables.")
asyncio.run(setup_canvas_data_2024())
print("Done with all tables.")
async def setup_canvas_data_2024():
base_url: str = os.environ["DAP_API_URL"]
client_id: str = os.environ["DAP_CLIENT_ID"]
client_secret: str = os.environ["DAP_CLIENT_SECRET"]
connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
async with DatabaseConnection(connection_string).open() as db_connection:
async with DAPClient(base_url, credentials) as session:
#tables = await session.get_tables("canvas")
for table in desired_tables:
print(f" {table}")
try:
await SQLReplicator(session, db_connection).initialize("canvas", table)
except Exception as e:
print(f" - skipping {table} because {e}")
# Get something from Canvas Data # Get something from Canvas Data
def do_request(path): def do_request(path):
#Set up the request pieces #Set up the request pieces
@ -2199,9 +2263,11 @@ if __name__ == "__main__":
5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] ,
6: ['Canvas data: interactive sync', interactive ], 6: ['Canvas data: interactive sync', interactive ],
7: ['Canvas data: automated sync', sync_non_interactive ], 7: ['Canvas data: automated sync', sync_non_interactive ],
8: ['Scrape schedule from ssb', scrape_schedule_multi ], 8: ['Get canvas data 2024 style', canvas_data_2024_run ],
9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
16: ['Scrape schedule from ssb', scrape_schedule_multi ],
14: ['Generate latestart schedule', list_latestarts ], 14: ['Generate latestart schedule', list_latestarts ],
9: ['Test ssb calls with python', scrape_schedule_py ], 15: ['Test ssb calls with python', scrape_schedule_py ],
10: ['schedule to db', scrape_for_db ], 10: ['schedule to db', scrape_for_db ],
11: ['clean argos draft schedule file', argos_data_from_cvc], 11: ['clean argos draft schedule file', argos_data_from_cvc],
12: ['make expanded schedule json files of old semesters', expand_old_semesters ], 12: ['make expanded schedule json files of old semesters', expand_old_semesters ],

View File

@ -1,30 +1,41 @@
absl-py==2.1.0
aiofiles==23.2.1 aiofiles==23.2.1
aiohttp==3.9.3 aiohttp==3.9.3
aiohttp-retry==2.8.3 aiohttp-retry==2.8.3
aiomysql==0.2.0 aiomysql==0.2.0
aiosignal==1.3.1 aiosignal==1.3.1
annotated-types==0.6.0
annoy==1.17.3 annoy==1.17.3
arrow==1.3.0 arrow==1.3.0
async-timeout==4.0.3 async-timeout==4.0.3
asyncpg==0.29.0 asyncpg==0.29.0
attrs==23.2.0 attrs==23.2.0
Automat==22.10.0
Babel==2.14.0 Babel==2.14.0
bcrypt==4.1.2 bcrypt==4.1.2
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
bidict==0.22.1 bidict==0.22.1
blinker==1.7.0 blinker==1.7.0
blis==0.7.11
cachetools==5.3.2 cachetools==5.3.2
catalogue==2.0.10
certifi==2024.2.2 certifi==2024.2.2
cffi==1.16.0 cffi==1.16.0
charset-normalizer==3.3.2 charset-normalizer==3.3.2
click==8.1.7 click==8.1.7
cloudpathlib==0.16.0
colorama==0.4.6 colorama==0.4.6
confection==0.1.4
constantly==23.10.4
contourpy==1.2.0 contourpy==1.2.0
courlan==1.0.0 courlan==1.0.0
cryptography==42.0.2 cryptography==42.0.2
cssselect==1.2.0
cycler==0.12.1 cycler==0.12.1
cymem==2.0.8
dateparser==1.2.0 dateparser==1.2.0
deepdiff==6.7.1 deepdiff==6.7.1
dm-tree==0.1.8
docxcompose==1.4.0 docxcompose==1.4.0
docxtpl==0.16.7 docxtpl==0.16.7
durable-rules==2.0.28 durable-rules==2.0.28
@ -46,22 +57,30 @@ google-auth-oauthlib==1.2.0
googleapis-common-protos==1.62.0 googleapis-common-protos==1.62.0
greenlet==3.0.3 greenlet==3.0.3
h11==0.14.0 h11==0.14.0
h5py==3.10.0
html2markdown==0.1.7 html2markdown==0.1.7
htmldate==1.7.0 htmldate==1.7.0
httplib2==0.22.0 httplib2==0.22.0
huggingface-hub==0.20.3 huggingface-hub==0.20.3
hyperlink==21.0.0
ics==0.7.2 ics==0.7.2
idna==3.6 idna==3.6
incremental==22.10.0
instructure-dap-client==0.3.18 instructure-dap-client==0.3.18
itemadapter==0.8.0
itemloaders==1.1.0
itsdangerous==2.1.2 itsdangerous==2.1.2
Jinja2==3.1.3 Jinja2==3.1.3
jmespath==1.0.1
joblib==1.3.2 joblib==1.3.2
json_strong_typing==0.3.2 json_strong_typing==0.3.2
jsondiff==2.0.0 jsondiff==2.0.0
jsonschema==4.21.1 jsonschema==4.21.1
jsonschema-specifications==2023.12.1 jsonschema-specifications==2023.12.1
jusText==3.0.0 jusText==3.0.0
keras==3.0.4
kiwisolver==1.4.5 kiwisolver==1.4.5
kneed==0.8.5
langcodes==3.3.0 langcodes==3.3.0
lark==1.1.9 lark==1.1.9
linkify-it-py==2.0.2 linkify-it-py==2.0.2
@ -76,6 +95,8 @@ mdurl==0.1.2
minizinc==0.9.0 minizinc==0.9.0
mpmath==1.3.0 mpmath==1.3.0
multidict==6.0.4 multidict==6.0.4
murmurhash==1.0.10
namex==0.0.7
networkx==3.2.1 networkx==3.2.1
nltk==3.8.1 nltk==3.8.1
numpy==1.26.3 numpy==1.26.3
@ -89,23 +110,33 @@ paho-mqtt==1.6.1
pampy==0.3.0 pampy==0.3.0
pandas==2.2.0 pandas==2.2.0
paramiko==3.4.0 paramiko==3.4.0
parsel==1.8.1
path-dict==4.0.0 path-dict==4.0.0
pathlib==1.0.1 pathlib==1.0.1
patsy==0.5.6
pdfminer==20191125 pdfminer==20191125
pdfminer.six==20231228 pdfminer.six==20231228
piexif==1.1.3 piexif==1.1.3
pillow==10.2.0 pillow==10.2.0
plotly==5.18.0
preshed==3.0.9
Protego==0.3.0
protobuf==4.25.2 protobuf==4.25.2
psycopg2==2.9.9
pyarrow==15.0.0 pyarrow==15.0.0
pyasn1==0.5.1 pyasn1==0.5.1
pyasn1-modules==0.3.0 pyasn1-modules==0.3.0
pycparser==2.21 pycparser==2.21
pycryptodome==3.20.0 pycryptodome==3.20.0
pydantic==2.6.1
pydantic_core==2.16.2
PyDispatcher==2.0.7
pygame==2.5.2 pygame==2.5.2
Pygments==2.17.2 Pygments==2.17.2
PyJWT==2.8.0 PyJWT==2.8.0
PyMySQL==1.1.0 PyMySQL==1.1.0
PyNaCl==1.5.0 PyNaCl==1.5.0
pyOpenSSL==24.0.0
pypandoc==1.12 pypandoc==1.12
pyparsing==3.1.1 pyparsing==3.1.1
PyPDF2==3.0.1 PyPDF2==3.0.1
@ -118,10 +149,12 @@ python-socketio==5.11.0
pytz==2024.1 pytz==2024.1
pywin32==306 pywin32==306
PyYAML==6.0.1 PyYAML==6.0.1
queuelib==1.6.2
redis==5.0.1 redis==5.0.1
referencing==0.33.0 referencing==0.33.0
regex==2023.12.25 regex==2023.12.25
requests==2.31.0 requests==2.31.0
requests-file==2.0.0
requests-oauthlib==1.3.1 requests-oauthlib==1.3.1
rich==13.7.0 rich==13.7.0
rpds-py==0.17.1 rpds-py==0.17.1
@ -130,9 +163,11 @@ safetensors==0.4.2
schedule==1.2.1 schedule==1.2.1
scikit-learn==1.4.0 scikit-learn==1.4.0
scipy==1.12.0 scipy==1.12.0
Scrapy==2.11.0
selenium==4.17.2 selenium==4.17.2
sentence-transformers==2.3.1 sentence-transformers==2.3.1
sentencepiece==0.1.99 sentencepiece==0.1.99
service-identity==24.1.0
simple-websocket==1.0.0 simple-websocket==1.0.0
simpy==4.1.1 simpy==4.1.1
six==1.16.0 six==1.16.0
@ -140,14 +175,23 @@ smart-open==6.4.0
sniffio==1.3.0 sniffio==1.3.0
sortedcontainers==2.4.0 sortedcontainers==2.4.0
soupsieve==2.5 soupsieve==2.5
spacy==3.7.2
spacy-legacy==3.0.12
spacy-loggers==1.0.5
SQLAlchemy==2.0.25 SQLAlchemy==2.0.25
srsly==2.4.8
statsmodels==0.14.1
striprtf==0.0.26 striprtf==0.0.26
sympy==1.12 sympy==1.12
tabulate==0.9.0
TatSu==5.11.3 TatSu==5.11.3
tenacity==8.2.3
textdistance==4.6.1 textdistance==4.6.1
textual==0.48.2 textual==0.48.2
thinc==8.2.2
threadpoolctl==3.2.0 threadpoolctl==3.2.0
tld==0.13 tld==0.13
tldextract==5.1.1
tokenizers==0.15.1 tokenizers==0.15.1
tomd==0.1.3 tomd==0.1.3
toolz==0.12.1 toolz==0.12.1
@ -157,6 +201,9 @@ trafilatura==1.7.0
transformers==4.37.2 transformers==4.37.2
trio==0.24.0 trio==0.24.0
trio-websocket==0.11.1 trio-websocket==0.11.1
Twisted==22.10.0
twisted-iocpsupport==1.0.4
typer==0.9.0
types-aiofiles==23.2.0.20240106 types-aiofiles==23.2.0.20240106
types-python-dateutil==2.8.19.20240106 types-python-dateutil==2.8.19.20240106
typing_extensions==4.9.0 typing_extensions==4.9.0
@ -165,9 +212,13 @@ tzlocal==5.2
uc-micro-py==1.0.2 uc-micro-py==1.0.2
uritemplate==4.1.1 uritemplate==4.1.1
urllib3==2.2.0 urllib3==2.2.0
w3lib==2.1.2
wasabi==1.1.2
weasel==0.3.4
Werkzeug==3.0.1 Werkzeug==3.0.1
Whoosh==2.7.4 Whoosh==2.7.4
wsproto==1.2.0 wsproto==1.2.0
xlwt==1.3.0 xlwt==1.3.0
yarl==1.9.4 yarl==1.9.4
yattag==1.15.2 yattag==1.15.2
zope.interface==6.1