From 667b06fe4a3fdc22736f20ddaecf33b5eaf02db9 Mon Sep 17 00:00:00 2001 From: eulaly Date: Tue, 31 Mar 2026 13:45:17 -0400 Subject: [PATCH] enforce single-job blocking --- pm/task-sample.org | 22 +++++ pm/tasks.org | 123 ++++++++++++++++++++++++++ youdis.py | 209 ++++++++++++++++++++++++++++----------------- 3 files changed, 278 insertions(+), 76 deletions(-) create mode 100644 pm/task-sample.org create mode 100644 pm/tasks.org diff --git a/pm/task-sample.org b/pm/task-sample.org new file mode 100644 index 0000000..8dd0253 --- /dev/null +++ b/pm/task-sample.org @@ -0,0 +1,22 @@ +#+title: Task Log +#+updated: [2026-03-18 Wed 14:19] + +Use the template below, which should be a top-level org-mode header. + +* [ ] M.m.m: Task Title (estimate # commits) +replace the old observed/canonical workflow with a review-first pipeline that groups normalized rows only during review/combine and links them to catalog items + +** Acceptance Criteria +1. Criterion + - expanded data +2. Criterion + +- pm note: amplifying information + +** evidence +- commit: abc123, bcd234 +- tests: +- datetime: [2026-03-18 Wed 14:15] + +** notes +- explanation of work done, decisions made, reasoning diff --git a/pm/tasks.org b/pm/tasks.org new file mode 100644 index 0000000..91cda0f --- /dev/null +++ b/pm/tasks.org @@ -0,0 +1,123 @@ +#+title: Youdis Task Log +#+updated: [2026-03-31 Tue 08:00] + +* [X] 1.1.1: stabilize youdis core bot behavior (estimate 3 commits) +refactor the current `youdis.py` flow so authorization, download execution, and user feedback are correct and predictable without changing the product shape. keep this narrowly scoped to correctness and maintainability; do not redesign into a queueing platform yet. preserve archive-first behavior and dm status updates; do not add new infrastructure dependencies and prefer boring explicit state over clever concurrency. + +** acceptance criteria +1. initialize and load `/config/users.json` safely in all cases + - create parent dirs before touch/open + - ensure `authorized_users` always has a valid default + - normalize stored ids to a single type +2. fix command-path correctness for `/youtube`, `/adduser`, and `/removeuser` + - authorized users can successfully invoke downloads + - add/remove user commands persist changes correctly + - remove broken/incomplete code paths +3. duplicate prevention relies on archive.txt + +** pm notes + +** evidence +- commit: 033d9dd +- tests: ~python3 -m py_compile ./youdis.py~ +- datetime: [2026-03-31 Tue 13:28] + +** notes +- store Discord user ids as strings in `users.json` +- duplicate prevention should continue to rely on `archive.txt`, not inferred hook errors + +* [ ] 1.1.2: remove global mutable download state and define single-job semantics (estimate 2 commits) +eliminate shared mutable hook state and make concurrent behavior explicit, even if the initial policy is just "one active job at a time." don't build a scheduler; ok if simplest outcome is single active job with clear busy message. cancellation can be coarse if yt-dlp/process boundaries make graceful stop annoying + +** acceptance criteria +1. improve runtime handling for downloads + - replace brittle thread/join pattern with a simpler async-safe execution path + - catch and report real yt-dlp failures + - avoid misleading "already exists" error assumptions +2. progress reporting is isolated per request + - no module-level mutable title state shared across jobs + - hooks derive state from request-local context +3. active-job behavior is explicit + - either reject a second request while busy or implement a minimal tracked active job + - user-facing response explains current behavior +4. `/interrupt` is either implemented minimally or downgraded honestly + - no fake command implying cancellation works when it does not + - command behavior matches implementation + +** evidence +- commit: +- tests: +- datetime: + +** notes +- verify slash-command response patterns against the `interactions` library while touching runtime flow + +* [ ] 1.1.3: move static yt-dlp behavior into config and shrink python surface area (estimate 2 commits) +shift stable downloader options into `default-yt-dlp.conf` so the bot code only handles dynamic inputs and orchestration. optimize for inspectability and low-friction manual ops. keep output naming durable enough for plex/plain-file use. avoid duplicating config values across code and conf. + +** acceptance criteria +1. separate static vs dynamic yt-dlp options cleanly + - stable defaults live in `default-yt-dlp.conf` + - python injects only request-specific/runtime values +2. preserve archive and output behavior + - `archive.txt` remains the duplicate-prevention mechanism + - output paths remain stable and browseable +3. document config ownership + - clarify which settings belong in config vs code + - make future yt-dlp tuning possible without major python edits + +** evidence +- commit: +- tests: +- datetime: + +** notes + +* [ ] 1.1.4: simplify image/build/update workflow around manual ops (estimate 3 commits) +reduce repo cruft from the gitea-runner/nightly-update experiment and replace it with explicit manual update/rebuild mechanics. + +** acceptance criteria +1. define a manual update path for yt-dlp and app image lifecycle + - document or script manual `git pull`, rebuild, and redeploy + - remove or quarantine brittle auto-update assumptions +2. review and simplify `update-ytdlp.sh`, workflow yaml, and weekly restart artifacts + - keep only artifacts that serve the current manual-ops model + - delete or mark deprecated anything tied to abandoned automation paths +3. retain unraid deployment viability + - container can still be rebuilt and redeployed cleanly on jeeves + - resulting flow is understandable without rereading old ci experiments + +- pm note: weekly restart is presumed suspect until proven necessary + +** evidence +- commit: +- tests: +- datetime: + +** notes +- do not let runner/workflow complexity dominate a small bot +- prefer explicit version pinning or manual binary refresh over magical nightlies + +* [ ] 1.1.5: clean up packaging/deployment artifacts for unraid consumption (estimate 2 commits) +make the dockerfile, run script, and unraid-ca template consistent with the refactored app so deployment is less of a ritual ordeal. + +** acceptance criteria +1. align docker/runtime assumptions + - paths like `/config` and `/downloads` are consistent across code, scripts, and container metadata + - env vars are documented and validated +2. review deployment artifacts for drift + - `dockerfile`, `run-youdis.sh`, and `unraid-ca-template.xml` reflect current behavior + - remove stale references and dead assumptions +3. make fresh deployment understandable + - a new deploy on unraid is possible without reconstructing tribal knowledge from old files + +- pm note: this is packaging polish after core correctness, not before + +** evidence +- commit: +- tests: +- datetime: + +** notes +- keep container surface area small +- optimize for “future me can redeploy this without cursing past me too hard” diff --git a/youdis.py b/youdis.py index 7e34204..d530bdd 100644 --- a/youdis.py +++ b/youdis.py @@ -48,32 +48,69 @@ def load_authorized_users(): return authorized_users authorized_users = load_authorized_users() - -title = '' - -async def send_message(ctx, message): - await ctx.author.send(message) - -def download_video(url, options): - with yt_dlp.YoutubeDL(options) as ydl: - ydl.download(url) - -def create_hook(ctx,loop): - def hook(d): - global title - status = d.get('status') - if status == 'error': - msg = f'error; video probably already exists, have you checked archive.txt' - asyncio.run_coroutine_threadsafe(send_message(ctx,msg),loop) - elif d.get('info_dict').get('title') != title: - title = d.get('info_dict').get('title') - playlist_index = d.get('info_dict').get('playlist_index') - playlist_count = d.get('info_dict').get('playlist_count') - filename = d.get('filename') - url = d.get('info_dict').get('webpage_url') - msg = f'{status} {playlist_index} of {playlist_count}: {filename} <{url}>' - asyncio.run_coroutine_threadsafe(send_message(ctx,msg),loop) - return hook + +active_job_lock = threading.Lock() +active_job = None + +async def send_message(ctx, message): + await ctx.author.send(message) + +def claim_active_job(job): + global active_job + with active_job_lock: + if active_job is not None: + return active_job + active_job = job + return None + +def get_active_job(): + with active_job_lock: + return active_job + +def clear_active_job(job): + global active_job + with active_job_lock: + if active_job is job: + active_job = None + +def download_video(url, options): + with yt_dlp.YoutubeDL(options) as ydl: + ydl.download(url) + +def create_hook(ctx, loop, cancel_event): + seen_updates = set() + + def hook(d): + if cancel_event.is_set(): + raise yt_dlp.utils.DownloadCancelled('download canceled by /interrupt') + + status = d.get('status') + info = d.get('info_dict') or {} + + if status not in {'downloading', 'finished'}: + return + + filename = d.get('filename') or info.get('_filename') or info.get('title') + update_key = (status, filename) + if update_key in seen_updates: + return + + seen_updates.add(update_key) + playlist_index = info.get('playlist_index') + playlist_count = info.get('playlist_count') + url = info.get('webpage_url') + + prefix = status + if playlist_index and playlist_count: + prefix = f'{status} {playlist_index} of {playlist_count}' + + msg = f'{prefix}: {filename}' + if url: + msg = f'{msg} <{url}>' + + asyncio.run_coroutine_threadsafe(send_message(ctx, msg), loop) + + return hook @interactions.slash_command(name="youtube",description="download video from youtube to server") @interactions.slash_option( @@ -84,52 +121,77 @@ def create_hook(ctx,loop): ) async def youtube(ctx: interactions.SlashContext, url:str): print(f'{ctx.author.id} requested {url}') - loop = asyncio.get_running_loop() - hook = create_hook(ctx,loop) - # use api_to_cli and paste cli options to get the output you need - yoptions = { - 'format':'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', - 'fragment_tries': 10, - 'restrictfilenames':True, - 'paths': {'home':'/downloads'}, - 'retries':10, - 'writeinfojson':False, - 'allow_playlist_files':True, - 'noplaylist':True, - 'download_archive':'/config/archive.txt', - 'progress_hooks':[hook], - 'outtmpl': '%(uploader)s/%(playlist_title)s/%(playlist_index)s%(playlist_index& - )s%(title)s.%(ext)s', - 'outtmpl_na_placeholder':'', - } # check that user is authorized if str(ctx.author.id) not in authorized_users: if ctx.author.id == 127831327012683776: await ctx.author.send('potato stop') await ctx.author.send('you are not authorized to use this command. message my owner to be added.') - return - else: - await ctx.channel.send(f'Downloading from <{url}>. Status updates via DM.') - #await ctx.defer() #if you need up to 15m to respond - - # 1/2 - download in separate thread, else progress_hook blocks downstream async ctx.send - download_thread = threading.Thread(target=download_video, args=(url,yoptions)) - download_thread.start() - await asyncio.to_thread(download_thread.join) - - # 2/2 - replace the above with this next try: - #try: - # await asyncio.to_thread(download_video, url, yoptions) - #except Exception as e: - # print(f"download failed: {e}") - # await ctx.author.send(f"download failed: {str(e)}") - - -@interactions.slash_command(name="interrupt",description="cancel current job") -@interactions.check(interactions.is_owner()) -async def _interrupt(ctx): - # interrupt here - print('interrupting current job - not implemented') - await ctx.author.send('interrupting current job - not implemented') + return + + loop = asyncio.get_running_loop() + cancel_event = threading.Event() + hook = create_hook(ctx, loop, cancel_event) + job = { + 'requester_id': str(ctx.author.id), + 'request_url': url, + 'cancel_event': cancel_event, + } + existing_job = claim_active_job(job) + if existing_job: + await ctx.author.send( + f'already downloading for <@{existing_job["requester_id"]}>. ' + 'single-job mode is enabled right now; try again after it finishes.' + ) + return + + # use api_to_cli and paste cli options to get the output you need + yoptions = { + 'format':'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', + 'fragment_tries': 10, + 'restrictfilenames':True, + 'paths': {'home':'/downloads'}, + 'retries':10, + 'writeinfojson':False, + 'allow_playlist_files':True, + 'noplaylist':True, + 'download_archive':'/config/archive.txt', + 'progress_hooks':[hook], + 'outtmpl': '%(uploader)s/%(playlist_title)s/%(playlist_index)s%(playlist_index& - )s%(title)s.%(ext)s', + 'outtmpl_na_placeholder':'', + } + await ctx.channel.send(f'Downloading from <{url}>. Status updates via DM. Single-job mode is enabled.') + + try: + await asyncio.to_thread(download_video, url, yoptions) + except yt_dlp.utils.DownloadCancelled as exc: + print(f'download canceled: {exc}') + await ctx.author.send(f'download canceled: {exc}') + except yt_dlp.utils.DownloadError as exc: + print(f'download failed: {exc}') + await ctx.author.send(f'download failed: {exc}') + except Exception as exc: + print(f'unexpected download failure: {exc}') + await ctx.author.send(f'unexpected download failure: {exc}') + else: + await ctx.author.send(f'download complete for <{url}>') + finally: + clear_active_job(job) + + +@interactions.slash_command(name="interrupt",description="cancel current job") +@interactions.check(interactions.is_owner()) +async def _interrupt(ctx): + job = get_active_job() + if not job: + await ctx.author.send('no active download to interrupt') + return + + job['cancel_event'].set() + print(f'interrupt requested for {job["request_url"]}') + await ctx.author.send( + f'interrupt requested for <{job["request_url"]}>; ' + 'cancellation is coarse and will stop on the next yt-dlp progress update' + ) @interactions.slash_command(name="adduser",description="authorize target user") @interactions.slash_option( @@ -166,13 +228,8 @@ async def _removeuser(ctx: interactions.SlashContext, user:interactions.OptionTy await ctx.author.send(f'deauthorized {user.mention}') else: await ctx.author.send(f'{user.mention} is not currently authorized') - -async def dl_hook(d): - msg = f'{d["status"]} {d["filename"]}' - print(msg) - await ctx.author.send(msg) - -api_token = getenv('api_token') -if not api_token: - raise ValueError('API token not set. Retrieve from your Discord bot.') + +api_token = getenv('api_token') +if not api_token: + raise ValueError('API token not set. Retrieve from your Discord bot.') bot.start(api_token)