From 1ddf9be13ab16d4f170f592e1b1b454c3ce0877f Mon Sep 17 00:00:00 2001 From: Peter Howell Date: Fri, 29 Aug 2025 01:11:06 +0000 Subject: [PATCH] safe folders --- util.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) mode change 100644 => 100755 util.py diff --git a/util.py b/util.py old mode 100644 new mode 100755 index 270e8b9..2e1fbf0 --- a/util.py +++ b/util.py @@ -1,16 +1,58 @@ +import os +import builtins as _builtins +import codecs as _codecs +# --- Safe file I/O monkey patches --- +# Ensure parent folders exist for write/append modes when using open/codecs.open +_orig_open = _builtins.open +_orig_codecs_open = _codecs.open +def _ensure_parent_dir(path): + try: + if isinstance(path, (str, bytes, os.PathLike)): + d = os.path.dirname(os.fspath(path)) + if d and not os.path.exists(d): + os.makedirs(d, exist_ok=True) + except Exception: + # Never block the open call due to directory check errors + pass +def _open_with_dirs(file, mode='r', *args, **kwargs): + try: + if isinstance(file, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')): + _ensure_parent_dir(file) + finally: + return _orig_open(file, mode, *args, **kwargs) + +def _codecs_open_with_dirs(filename, mode='r', encoding=None, errors='strict', buffering=1): + try: + if isinstance(filename, (str, bytes, os.PathLike)) and any(m in mode for m in ('w','a','x','+')): + _ensure_parent_dir(filename) + finally: + return _orig_codecs_open(filename, mode, encoding, errors, buffering) + +# Apply patches once +_builtins.open = _open_with_dirs +_codecs.open = _codecs_open_with_dirs + +# Patch pandas to_csv to auto-create parent folder if available +try: + import pandas as _pd # noqa: F401 + _orig_to_csv = _pd.DataFrame.to_csv + def _to_csv_with_dirs(self, path_or_buf=None, *args, **kwargs): + if isinstance(path_or_buf, (str, bytes, os.PathLike)): + _ensure_parent_dir(path_or_buf) + return _orig_to_csv(self, path_or_buf, *args, **kwargs) + _pd.DataFrame.to_csv = _to_csv_with_dirs +except Exception: + pass import re, csv from collections import defaultdict -from bs4 import BeautifulSoup as bs import pytz, datetime, dateutil, json from datetime import timedelta from dateutil import tz -import functools - from functools import reduce # Teacher name format changed. Remove commas and switch first to last @@ -84,6 +126,8 @@ def extract_key_values(lst, x): return reduce(lambda acc, item: acc + [item[x]] if isinstance(item, dict) and x in item else acc, lst, []) def stripper(s): + from bs4 import BeautifulSoup as bs + REMOVE_ATTRIBUTES = [ 'lang','language','onmouseover','onmouseout','script','style','font', 'dir','face','size','color','style','class','width','height','hspace', @@ -281,6 +325,8 @@ def clean_fn(s): return s def format_html(html): + from bs4 import BeautifulSoup as bs + soup = bs(html, 'html.parser') return soup.prettify()