rwx/rwx/sw/yt_dlp/__init__.py
2025-06-07 17:40:15 +02:00

146 lines
3.3 KiB
Python

"""YouTube DownLoad."""
# playlists
# …
# entries
# playlists / entries
# title
# id
# playlist
# entries
# playlist / entries
# …
from abc import ABC, abstractmethod
from typing import Any
from rwx import Object
from rwx.log import stream as log
from yt_dlp import YoutubeDL
class Tab(Object, ABC):
"""YouTube Tab."""
URL_ROOT = "https://youtube.com"
def __init__(self, object_id: str) -> None:
"""Set object id.
:param object_id: object identifier
:type object_id: str
"""
self.object_id = object_id
log.info(self.object_id)
self.url = self.get_url()
log.info(self.url)
def extract(self) -> dict[str, Any]:
"""Return extracted dict.
:rtype: dict
"""
yt_dl = Tab.yt_dl(self.get_options())
return yt_dl.extract_info(self.url, download=False)
def get_options(self) -> dict:
"""Return options for the action.
:rtype: dict
"""
return {
"extract_flat": True,
"skip_download": True,
}
@abstractmethod
def get_url(self) -> str:
"""Return URL to access for object.
:rtype: str
"""
@staticmethod
def yt_dl(opt: dict) -> YoutubeDL:
options = {**opt, "ignoreerrors": False, "quiet": False}
log.info(options)
return YoutubeDL(options)
class Playlist(Tab):
def __init__(self, playlist_id: str) -> None:
super().__init__(playlist_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/playlist?list={self.object_id}"
class Playlists(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/playlists"
class Video(Tab):
def __init__(self, video_id: str) -> None:
super().__init__(video_id)
info = self.extract()
self.title = info["title"]
self.fulltitle = info["fulltitle"]
self.duration = info["duration"]
self.categories = info["categories"]
self.tags = info["tags"]
self.description = info["description"]
# TODO formats
# TODO thumbnails
# TODO thumbnail
def download(self) -> None:
Tab.yt_dl(
{
"format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
"outtmpl": "%(id)s.%(ext)s",
"writesubtitles": True,
"writethumbnail": True,
}
).download([self.url])
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/watch?v={self.object_id}"
# channel
# title
# channel_follower_count
# description
# tags
# thumbnails
# uploader_id
# uploader
# videos / entries
# title
# description truncated
# duration
# thumbnails
# view_count
class Videos(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
info = self.extract()
self.title = info["title"]
self.ids = [v["id"] for v in info["entries"]]
self.videos = {}
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/videos"
def load(self) -> None:
done = 0
for video_id in self.ids:
self.videos[video_id] = Video(video_id)
done += 1
log.info(done)