rwx/rwx/sw/yt_dlp/__init__.py
2025-06-07 17:40:16 +02:00

166 lines
3.8 KiB
Python

"""YouTube DownLoad."""
# playlists
# …
# entries
# playlists / entries
# title
# id
# playlist
# entries
# playlist / entries
# …
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any
from rwx import Object
from rwx.log import stream as log
from yt_dlp import YoutubeDL
class Tab(Object, ABC):
"""YouTube Tab."""
URL_ROOT = "https://youtube.com"
def __init__(self, object_id: str) -> None:
"""Set object id.
:param object_id: object identifier
:type object_id: str
"""
self.object_id = object_id
log.info(self.object_id)
self.url = self.get_url()
log.info(self.url)
def extract(self) -> dict[str, Any]:
"""Return extracted dict.
:rtype: dict
"""
yt_dl = Tab.yt_dl(self.get_options())
return yt_dl.extract_info(self.url, download=False)
def get_options(self) -> dict:
"""Return options for the action.
:rtype: dict
"""
return {
"extract_flat": True,
"skip_download": True,
}
@abstractmethod
def get_url(self) -> str:
"""Return URL to access for object.
:rtype: str
"""
@staticmethod
def yt_dl(opt: dict) -> YoutubeDL:
options = {
**opt,
"ignoreerrors": False,
"quiet": False,
}
log.info(options)
return YoutubeDL(options)
class Playlist(Tab):
def __init__(self, playlist_id: str) -> None:
super().__init__(playlist_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/playlist?list={self.object_id}"
class Playlists(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/playlists"
class Video(Tab):
def __init__(self, video_id: str) -> None:
super().__init__(video_id)
info = self.extract()
self.title = info["title"]
self.fulltitle = info["fulltitle"]
self.duration = info["duration"]
self.categories = info["categories"]
self.tags = info["tags"]
self.description = info["description"]
def download(self) -> None:
Tab.yt_dl(
{
"format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
"outtmpl": "%(id)s.%(ext)s",
"writesubtitles": True,
"writethumbnail": True,
}
).download([self.url])
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/watch?v={self.object_id}"
# channel
# title
# channel_follower_count
# description
# tags
# thumbnails
# uploader_id
# uploader
# videos / entries
# title
# description truncated
# duration
# thumbnails
# view_count
class Videos(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
info = self.extract()
self.title = info["title"]
self.ids = [v["id"] for v in reversed(info["entries"])]
self.videos = {}
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/videos"
def load(self) -> None:
done = 0
for video_id in self.ids:
self.videos[video_id] = Video(video_id)
done += 1
log.info(done)
def next(self) -> str | None:
for video_id in self.ids:
if not Path(f"{video_id}.mp4").exists():
return video_id
return None
def download(video_id: str | None) -> None:
if video_id:
Tab.yt_dl(
{
"format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
"outtmpl": "%(id)s.%(ext)s",
"writesubtitles": True,
"writethumbnail": True,
}
).download([f"{Tab.URL_ROOT}/watch?v={video_id}"])