req.txt and ezproxy script

2024-02-06 07:45:07 -08:00 · 2024-02-06 07:45:07 -08:00 · e7a80d8880
parent 36008e461b
commit e7a80d8880
4 changed files with 355 additions and 2 deletions
--- a/content.py
+++ b/content.py
@ -1248,6 +1248,37 @@ def search_embeddings():
            print(f'Top {i+1}: {r}, {search_index[r]}')     #{file} - {sentence} - (Score: {score})')


+
+def repairy_ezproxy_links():
+    from localcache2 import pages_in_term
+
+    # get all pages in term
+    all_pages = pages_in_term()
+
+    # c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
+    for p in all_pages:
+        course = p[1]
+        title = p[4]
+        url = p[5]
+        body = p[7]
+        # print(body)
+        try:
+            #s = re.search('''["']https:\/\/ezproxy\.gavilan\.edu\/login\?url=(.*)["']''',body)
+            a = re.search(r'Online Library Services',title)
+            if a:
+                continue
+            s = re.findall('\n.*ezproxy.*\n',body)
+            if s:
+                print(course, title, url)
+                print("   ", s, "\n")    # s.group())
+        except Exception as e:
+            #print(f"Skipped: {title},   {e}")
+            pass
+
+
+
+
+
 if __name__ == "__main__":
    
    print ('')
@ -1267,6 +1298,7 @@ if __name__ == "__main__":
               14: ['do a vector search', search_embeddings],
               15: ['test priority', test_priority], 
               16: ['test embed', test_embed], 
+               17: ['repair ezproxy links', repairy_ezproxy_links],
              }
    
    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
--- a/localcache2.py
+++ b/localcache2.py
@ -0,0 +1,204 @@
+# Local data, saving and manipulating
+
+import os, re, gzip, codecs, funcy, pytz, json, random, functools, requests, sys, csv, time, psycopg2
+import pandas as pd
+import numpy as np
+from collections import defaultdict
+from datetime import datetime as dt
+from datetime import timedelta
+from dateutil.parser import parse
+from os.path import exists, getmtime
+from pipelines import sync_non_interactive, url, header, gp, dean
+from tabulate import tabulate
+
+#########
+######### LOCAL DB
+#########
+
+CON = ''
+CURSOR = ''
+
+def db():
+    global CON,CURSOR
+    CON = psycopg2.connect(database="db",
+                        host="192.168.1.6",
+                        user="postgres",
+                        password="rolley34",
+                        port="5432")
+
+    CURSOR = CON.cursor()
+    return CON,CURSOR
+
+
+'''
+# Help the next function to upload new users directly to conf database on gavilan.
+def employees_refresh_flex(data):
+    try:
+        data['a'] = 'set/newuser'
+        data['sis_user_id'] = data['sis_user_id'][3:]
+        print("\nUploading this: \n")
+        print(json.dumps(data, indent=2))
+        print("\n")
+        a = input("Continue (y) or skip (n) ? ")
+        if a == 'y':
+            # This is what I was missing..........
+            # req.add_header("Content-type", "application/x-www-form-urlencoded")
+            r3 = requests.post('https://www.gavilan.edu/staff/flex/2020/api.php', params=data)
+            print(r3.text)
+        #print(r3.headers)
+    except Exception as ex:
+        print("Failed on: %s\nErr: %s" % (str(data),str(ex)))
+
+
+
+# Everyone in iLearn DB with an   xyz@gavilan.edu   email address. 
+def all_gav_employees():
+    (connection,cursor) = db()
+    connection.row_factory = dict_factory
+    q = """SELECT u.canvasid, u.name, u.created, u.sortablename, h.address, h.type, h.workflow_state, 
+           h.updated_at, p.last_request_at, p.last_login_at, p.current_login_at, p.last_login_ip, 
+           p.current_login_ip, p.sis_user_id, p.unique_name FROM users AS u
+           JOIN comm_channel AS h ON u.id=h.user_id
+           JOIN pseudonym AS p ON p.user_id=u.id
+           WHERE h.address LIKE "%@gavilan.edu"
+           ORDER BY u.sortablename"""
+    cursor = connection.cursor()
+    cursor.execute(q)
+    everyone = cursor.fetchall()
+    everyone_set = set()
+    for E in everyone:
+        try:
+            everyone_set.add(    E['address'].lower()  )
+        except Exception as e:
+            print("Exception: %s\nwith: %s" % (str(e), str(E)))
+    
+    oo = open('cache/temp1.txt','w')
+    oo.write(json.dumps(list(everyone_set), indent=2))
+    existing = requests.get('https://gavilan.edu/staff/flex/2020/api.php?a=get/users')
+    ex = json.loads( existing.text )
+    already_enrolled = set()
+    for usr in ex['users']:
+        try:
+            #already_enrolled.add(   (usr['goo'], usr['email'].lower(), usr['name'])  )
+            already_enrolled.add(  usr['email'].lower()   )
+        except Exception as e:
+            print("Exception: %s\nWith: %s" % (str(e),str(usr)))
+
+    oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
+    oo.write(json.dumps(list(already_enrolled), indent=2))
+
+    # conf_users wants:   goo, email, name, active
+    #   and emails have random capitalization
+    #   name is First Last,   and sometimes with Middle in there. 
+    #
+    
+    # using sets:     to_enroll = [ x for x in students if x not in already_enrolled ]     
+    new_emp = [ x for x in everyone_set if x not in already_enrolled ]
+
+    # take the all_employee list,   filter -> anyone who's in 'existing' is removed
+
+    # funcy.where( lambda x: x['email'] == ae[4]   ,  existing )
+
+    #new_emp = list(funcy.filter( lambda ae: funcy.where( existing, email=ae['email'] ),  all_emp ))
+    #new_emp = list(funcy.where( existing, email=b'phowell@gavilan.edu')) #ae['email'] ))
+    print(new_emp)
+    oo.write( "\n"*20 + '------------------------------------------\n'*20 + '------ - - - - - - ' )
+    oo.write(json.dumps(list(new_emp), indent=2))
+
+    # Now, iLearn db (everyone)... find the rows that match the email addresses
+    # that we've decided we need to add (new_emp)
+    
+    #print(everyone)
+    #print( "searching for %s" % j )
+    #print( "searched for %s, found: %s" % (j, str(to_add) ))
+    #print("\nUploading...\n")
+    for j in new_emp:
+        #j = new_emp[0]
+        print(j)
+        to_add = list(funcy.where( everyone, address=j ))
+        if to_add:
+            employees_refresh_flex(to_add[0])
+        else:
+            print("Didn't find an entry for that account.")
+    print("done uploading")
+
+'''
+
+
+
+def teachers_by_term(TERM = "202430"):
+    q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.created_at, c.start_at, c.workflow_state, e.last_attended_at, 
+u.id, u.sortable_name, u.created_at FROM canvas.courses AS c 
+JOIN canvas.enrollments AS e ON e.course_id=c.id 
+JOIN canvas.users AS u ON u.id=e.user_id
+WHERE c.sis_source_id LIKE '{TERM}%' AND e.type='TeacherEnrollment' ORDER BY u.sortable_name, c.course_code;"""
+    (connection,cursor) = db()
+    cursor.execute(q)
+    all_teachers = cursor.fetchall()
+    
+    table = [ [t[9],t[1],t[3],t[6]]  for t in all_teachers]
+    print(tabulate(table))
+
+    #for t in all_teachers:
+    #    print("\t".join( [str(x) for x in [t[9],t[1],t[3],t[6]]]))
+    return all_teachers
+
+
+
+def courses_in_term(TERM = "202430"):
+    q = f"""SELECT c.id, c.name, c.course_code, c.sis_source_id, c.workflow_state FROM canvas.courses AS c
+WHERE c.sis_source_id LIKE '{TERM}%' ORDER BY c.course_code;"""
+    (connection,cursor) = db()
+    cursor.execute(q)
+    all = cursor.fetchall()
+    
+    #table = [ [t[9],t[1],t[3],t[6]]  for t in all_teachers]
+    print(tabulate(all))
+
+    
+
+def pages_in_term(TERM="202430"):    #
+    q = f"""SELECT c.id, c.course_code, c.sis_source_id, wp.id as wp_id, wp.title, wp.url, c.name , wp.body
+FROM canvas.courses c
+JOIN canvas.wiki_pages wp ON wp.context_id=c.id
+WHERE c.sis_source_id  LIKE '{TERM}%' 
+ORDER BY c.sis_source_id, wp.title;"""
+    (connection,cursor) = db()
+    cursor.execute(q)
+    all = cursor.fetchall()
+    #print(tabulate(all))
+    return all
+
+
+
+
+
+
+
+
+if __name__ == "__main__":
+
+    print ('')
+    options = { 
+            1: ['all teachers', teachers_by_term],
+            2: ['courses in term', courses_in_term],
+            3: ['pages in term', pages_in_term]
+    }
+
+
+    if len(sys.argv) > 1 and re.search(r'^\d+',sys.argv[1]):
+        resp = int(sys.argv[1])
+        print("\n\nPerforming: %s\n\n" % options[resp][0])
+    
+    else:
+        print ('')
+        for key in options:
+            print(str(key) + '.\t' + options[key][0])
+        
+        print('')
+        resp = input('Choose: ')
+    
+    # Call the function in the options dict
+    options[ int(resp)][1]() 
+
+
--- a/pipelines.py
+++ b/pipelines.py
@ -13,6 +13,12 @@ from deepdiff import DeepDiff
 from canvas_secrets import apiKey, apiSecret, FTP_SITE, FTP_USER, FTP_PW, GOO, GOO_PIN, token, url, domain, account_id, header, g_id, g_secret
 from canvas_secrets import instructure_url, instructure_username, instructure_private_key

+import os, asyncio
+from dap.api import DAPClient
+from dap.dap_types import Credentials
+from dap.integration.database import DatabaseConnection
+from dap.replicator.sql import SQLReplicator
+


 """
@ -432,6 +438,64 @@ def get_enrlmts_for_user(user,enrollments):
 ################


+# Get canvas data 2024 style
+def canvas_data_2024_run():
+    print("Updating all tables.")
+    asyncio.run(canvas_data_2024())
+    print("Done with all tables.")
+
+
+async def canvas_data_2024():
+
+    base_url: str = os.environ["DAP_API_URL"]
+    client_id: str = os.environ["DAP_CLIENT_ID"]
+    client_secret: str = os.environ["DAP_CLIENT_SECRET"]
+    connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
+    
+    desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
+    credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
+
+    async with DatabaseConnection(connection_string).open() as db_connection:
+        async with DAPClient(base_url, credentials) as session:
+            #tables = await session.get_tables("canvas")
+            for table in desired_tables:
+                print(f"  trying to update {table} ")
+                try:
+                    #await SQLReplicator(session, db_connection).initialize("canvas", table) 
+                    await SQLReplicator(session, db_connection).synchronize("canvas", table)
+                except Exception as e:
+                    print(f"  - skipping {table} because {e}")
+
+
+
+# Get canvas data 2024 style
+def setup_canvas_data_2024_run():
+    print("Setting up all tables.")
+    asyncio.run(setup_canvas_data_2024())
+    print("Done with all tables.")
+
+
+async def setup_canvas_data_2024():
+
+    base_url: str = os.environ["DAP_API_URL"]
+    client_id: str = os.environ["DAP_CLIENT_ID"]
+    client_secret: str = os.environ["DAP_CLIENT_SECRET"]
+    connection_string: str = "postgresql://postgres:rolley34@192.168.1.6/db"
+    
+    desired_tables = "users,courses,communication_channels,context_modules,conversation_message_participants,conversation_messages,conversation_participants,conversations,course_sections,enrollment_states,enrollment_dates_overrides,enrollment_terms,enrollments,learning_outcome_groups,learning_outcome_question_results,learning_outcomes,quizzes,scores,submissions,submission_versions,wiki_pages,wikis".split(',')
+    credentials = Credentials.create(client_id=client_id, client_secret=client_secret)
+
+    async with DatabaseConnection(connection_string).open() as db_connection:
+        async with DAPClient(base_url, credentials) as session:
+            #tables = await session.get_tables("canvas")
+            for table in desired_tables:
+                print(f"  {table}")
+                try:
+                    await SQLReplicator(session, db_connection).initialize("canvas", table) 
+                except Exception as e:
+                    print(f"  - skipping {table} because {e}")
+
+
 # Get something from Canvas Data
 def do_request(path):  
    #Set up the request pieces
@ -2199,9 +2263,11 @@ if __name__ == "__main__":
                5: ['Manually convert 3 csv files to joined json enrollment file.', convert_roster_files] , 
                6: ['Canvas data: interactive sync', interactive ], 
                7: ['Canvas data: automated sync', sync_non_interactive ], 
-                8: ['Scrape schedule from ssb', scrape_schedule_multi ], 
+                8: ['Get canvas data 2024 style', canvas_data_2024_run ],
+                9: ['Set up canvas data 2024 style', setup_canvas_data_2024_run],
+                16: ['Scrape schedule from ssb', scrape_schedule_multi ], 
                14: ['Generate latestart schedule', list_latestarts ],
-                9: ['Test ssb calls with python', scrape_schedule_py ], 
+                15: ['Test ssb calls with python', scrape_schedule_py ], 
                10: ['schedule to db', scrape_for_db ], 
                11: ['clean argos draft schedule file', argos_data_from_cvc],
                12: ['make expanded schedule json files of old semesters', expand_old_semesters ],
--- a/requirements.txt
+++ b/requirements.txt
@ -1,30 +1,41 @@
+absl-py==2.1.0
 aiofiles==23.2.1
 aiohttp==3.9.3
 aiohttp-retry==2.8.3
 aiomysql==0.2.0
 aiosignal==1.3.1
+annotated-types==0.6.0
 annoy==1.17.3
 arrow==1.3.0
 async-timeout==4.0.3
 asyncpg==0.29.0
 attrs==23.2.0
+Automat==22.10.0
 Babel==2.14.0
 bcrypt==4.1.2
 beautifulsoup4==4.12.3
 bidict==0.22.1
 blinker==1.7.0
+blis==0.7.11
 cachetools==5.3.2
+catalogue==2.0.10
 certifi==2024.2.2
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
+cloudpathlib==0.16.0
 colorama==0.4.6
+confection==0.1.4
+constantly==23.10.4
 contourpy==1.2.0
 courlan==1.0.0
 cryptography==42.0.2
+cssselect==1.2.0
 cycler==0.12.1
+cymem==2.0.8
 dateparser==1.2.0
 deepdiff==6.7.1
+dm-tree==0.1.8
 docxcompose==1.4.0
 docxtpl==0.16.7
 durable-rules==2.0.28
@ -46,22 +57,30 @@ google-auth-oauthlib==1.2.0
 googleapis-common-protos==1.62.0
 greenlet==3.0.3
 h11==0.14.0
+h5py==3.10.0
 html2markdown==0.1.7
 htmldate==1.7.0
 httplib2==0.22.0
 huggingface-hub==0.20.3
+hyperlink==21.0.0
 ics==0.7.2
 idna==3.6
+incremental==22.10.0
 instructure-dap-client==0.3.18
+itemadapter==0.8.0
+itemloaders==1.1.0
 itsdangerous==2.1.2
 Jinja2==3.1.3
+jmespath==1.0.1
 joblib==1.3.2
 json_strong_typing==0.3.2
 jsondiff==2.0.0
 jsonschema==4.21.1
 jsonschema-specifications==2023.12.1
 jusText==3.0.0
+keras==3.0.4
 kiwisolver==1.4.5
+kneed==0.8.5
 langcodes==3.3.0
 lark==1.1.9
 linkify-it-py==2.0.2
@ -76,6 +95,8 @@ mdurl==0.1.2
 minizinc==0.9.0
 mpmath==1.3.0
 multidict==6.0.4
+murmurhash==1.0.10
+namex==0.0.7
 networkx==3.2.1
 nltk==3.8.1
 numpy==1.26.3
@ -89,23 +110,33 @@ paho-mqtt==1.6.1
 pampy==0.3.0
 pandas==2.2.0
 paramiko==3.4.0
+parsel==1.8.1
 path-dict==4.0.0
 pathlib==1.0.1
+patsy==0.5.6
 pdfminer==20191125
 pdfminer.six==20231228
 piexif==1.1.3
 pillow==10.2.0
+plotly==5.18.0
+preshed==3.0.9
+Protego==0.3.0
 protobuf==4.25.2
+psycopg2==2.9.9
 pyarrow==15.0.0
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
 pycparser==2.21
 pycryptodome==3.20.0
+pydantic==2.6.1
+pydantic_core==2.16.2
+PyDispatcher==2.0.7
 pygame==2.5.2
 Pygments==2.17.2
 PyJWT==2.8.0
 PyMySQL==1.1.0
 PyNaCl==1.5.0
+pyOpenSSL==24.0.0
 pypandoc==1.12
 pyparsing==3.1.1
 PyPDF2==3.0.1
@ -118,10 +149,12 @@ python-socketio==5.11.0
 pytz==2024.1
 pywin32==306
 PyYAML==6.0.1
+queuelib==1.6.2
 redis==5.0.1
 referencing==0.33.0
 regex==2023.12.25
 requests==2.31.0
+requests-file==2.0.0
 requests-oauthlib==1.3.1
 rich==13.7.0
 rpds-py==0.17.1
@ -130,9 +163,11 @@ safetensors==0.4.2
 schedule==1.2.1
 scikit-learn==1.4.0
 scipy==1.12.0
+Scrapy==2.11.0
 selenium==4.17.2
 sentence-transformers==2.3.1
 sentencepiece==0.1.99
+service-identity==24.1.0
 simple-websocket==1.0.0
 simpy==4.1.1
 six==1.16.0
@ -140,14 +175,23 @@ smart-open==6.4.0
 sniffio==1.3.0
 sortedcontainers==2.4.0
 soupsieve==2.5
+spacy==3.7.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
 SQLAlchemy==2.0.25
+srsly==2.4.8
+statsmodels==0.14.1
 striprtf==0.0.26
 sympy==1.12
+tabulate==0.9.0
 TatSu==5.11.3
+tenacity==8.2.3
 textdistance==4.6.1
 textual==0.48.2
+thinc==8.2.2
 threadpoolctl==3.2.0
 tld==0.13
+tldextract==5.1.1
 tokenizers==0.15.1
 tomd==0.1.3
 toolz==0.12.1
@ -157,6 +201,9 @@ trafilatura==1.7.0
 transformers==4.37.2
 trio==0.24.0
 trio-websocket==0.11.1
+Twisted==22.10.0
+twisted-iocpsupport==1.0.4
+typer==0.9.0
 types-aiofiles==23.2.0.20240106
 types-python-dateutil==2.8.19.20240106
 typing_extensions==4.9.0
@ -165,9 +212,13 @@ tzlocal==5.2
 uc-micro-py==1.0.2
 uritemplate==4.1.1
 urllib3==2.2.0
+w3lib==2.1.2
+wasabi==1.1.2
+weasel==0.3.4
 Werkzeug==3.0.1
 Whoosh==2.7.4
 wsproto==1.2.0
 xlwt==1.3.0
 yarl==1.9.4
 yattag==1.15.2
+zope.interface==6.1