rwx/rwx/sw/yt_dlp/__init__.py

"""YouTube DownLoad."""

#  playlists
# …
# entries

#  playlists / entries
# title
# id

#  playlist
# entries

#  playlist / entries
# …

from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any
from rwx import Object
from rwx.log import stream as log
from yt_dlp import YoutubeDL


class Tab(Object, ABC):
    """YouTube Tab."""

    URL_ROOT = "https://youtube.com"

    def __init__(self, object_id: str) -> None:
        """Set object id.

        :param object_id: object identifier
        :type object_id: str
        """
        self.object_id = object_id
        log.info(self.object_id)
        self.url = self.get_url()
        log.info(self.url)

    def extract(self) -> dict[str, Any]:
        """Return extracted dict.

        :rtype: dict
        """
        yt_dl = Tab.yt_dl(self.get_options())
        return yt_dl.extract_info(self.url, download=False)

    def get_options(self) -> dict:
        """Return options for the action.

        :rtype: dict
        """
        return {
            "extract_flat": True,
            "skip_download": True,
        }

    @abstractmethod
    def get_url(self) -> str:
        """Return URL to access for object.

        :rtype: str
        """

    @staticmethod
    def yt_dl(opt: dict) -> YoutubeDL:
        options = {
            **opt,
            "ignoreerrors": False,
            "quiet": False,
        }
        log.info(options)
        return YoutubeDL(options)


class Playlist(Tab):
    def __init__(self, playlist_id: str) -> None:
        super().__init__(playlist_id)

    def get_url(self) -> str:
        return f"{Tab.URL_ROOT}/playlist?list={self.object_id}"


class Playlists(Tab):
    def __init__(self, channel_id: str) -> None:
        super().__init__(channel_id)

    def get_url(self) -> str:
        return f"{Tab.URL_ROOT}/channel/{self.object_id}/playlists"


class Video(Tab):
    def __init__(self, video_id: str) -> None:
        super().__init__(video_id)
        info = self.extract()
        self.title = info["title"]
        self.fulltitle = info["fulltitle"]
        self.duration = info["duration"]
        self.categories = info["categories"]
        self.tags = info["tags"]
        self.description = info["description"]

    def download(self) -> None:
        Tab.yt_dl(
            {
                "format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
                "outtmpl": "%(id)s.%(ext)s",
                "writesubtitles": True,
                "writethumbnail": True,
            }
        ).download([self.url])

    def get_url(self) -> str:
        return f"{Tab.URL_ROOT}/watch?v={self.object_id}"


# channel
# title
# channel_follower_count
# description
# tags
# thumbnails
# uploader_id
# uploader
#  videos / entries
# title
# description truncated
# duration
# thumbnails
# view_count
class Videos(Tab):
    def __init__(self, channel_id: str) -> None:
        super().__init__(channel_id)
        info = self.extract()
        self.title = info["title"]
        self.ids = [v["id"] for v in reversed(info["entries"])]
        self.videos = {}

    def get_url(self) -> str:
        return f"{Tab.URL_ROOT}/channel/{self.object_id}/videos"

    def load(self) -> None:
        done = 0
        for video_id in self.ids:
            self.videos[video_id] = Video(video_id)
            done += 1
            log.info(done)

    def next(self) -> str | None:
        for video_id in self.ids:
            if not Path(f"{video_id}.mp4").exists():
                return video_id
        return None


def download(video_id: str | None) -> None:
    if video_id:
        Tab.yt_dl(
            {
                "format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
                "outtmpl": "%(id)s.%(ext)s",
                "writesubtitles": True,
                "writethumbnail": True,
            }
        ).download([f"{Tab.URL_ROOT}/watch?v={video_id}"])