safe folders

This commit is contained in:
Peter Howell 2025-08-29 01:11:06 +00:00
parent f4c82c237c
commit 1ddf9be13a
1 changed files with 49 additions and 3 deletions

52
util.py Normal file → Executable file
View File

@ -1,16 +1,58 @@
import os
import builtins as _builtins
import codecs as _codecs
# --- Safe file I/O monkey patches ---
# Ensure parent folders exist for write/append modes when using open/codecs.open
_orig_open = _builtins.open
_orig_codecs_open = _codecs.open
def _ensure_parent_dir(path):
try:
if isinstance(path, (str, bytes, os.PathLike)):
d = os.path.dirname(os.fspath(path))
if d and not os.path.exists(d):
os.makedirs(d, exist_ok=True)
except Exception:
# Never block the open call due to directory check errors
pass
def _open_with_dirs(file, mode='r', *args, **kwargs):
try:
if isinstance(file, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')):
_ensure_parent_dir(file)
finally:
return _orig_open(file, mode, *args, **kwargs)
def _codecs_open_with_dirs(filename, mode='r', encoding=None, errors='strict', buffering=1):
try:
if isinstance(filename, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')):
_ensure_parent_dir(filename)
finally:
return _orig_codecs_open(filename, mode, encoding, errors, buffering)
# Apply patches once
_builtins.open = _open_with_dirs
_codecs.open = _codecs_open_with_dirs
# Patch pandas to_csv to auto-create parent folder if available
try:
import pandas as _pd # noqa: F401
_orig_to_csv = _pd.DataFrame.to_csv
def _to_csv_with_dirs(self, path_or_buf=None, *args, **kwargs):
if isinstance(path_or_buf, (str, bytes, os.PathLike)):
_ensure_parent_dir(path_or_buf)
return _orig_to_csv(self, path_or_buf, *args, **kwargs)
_pd.DataFrame.to_csv = _to_csv_with_dirs
except Exception:
pass
import re, csv
from collections import defaultdict
from bs4 import BeautifulSoup as bs
import pytz, datetime, dateutil, json
from datetime import timedelta
from dateutil import tz
import functools
from functools import reduce
# Teacher name format changed. Remove commas and switch first to last
@ -84,6 +126,8 @@ def extract_key_values(lst, x):
return reduce(lambda acc, item: acc + [item[x]] if isinstance(item, dict) and x in item else acc, lst, [])
def stripper(s):
from bs4 import BeautifulSoup as bs
REMOVE_ATTRIBUTES = [
'lang','language','onmouseover','onmouseout','script','style','font',
'dir','face','size','color','style','class','width','height','hspace',
@ -281,6 +325,8 @@ def clean_fn(s):
return s
def format_html(html):
from bs4 import BeautifulSoup as bs
soup = bs(html, 'html.parser')
return soup.prettify()