diff --git a/rwx/sw/ytdlp/__init__.py b/rwx/sw/ytdlp/__init__.py index 8d6ce3f..c6bae2b 100644 --- a/rwx/sw/ytdlp/__init__.py +++ b/rwx/sw/ytdlp/__init__.py @@ -1,20 +1,6 @@ """YouTube DownLoad.""" -# playlists -# … -# entries - -# playlists / entries -# title -# id - -# playlist -# entries - -# playlist / entries -# … - -from abc import ABC, abstractmethod +from datetime import datetime from pathlib import Path from typing import Any @@ -26,9 +12,25 @@ from rwx.log import stream as log URL = "https://youtube.com" +class Cache(Object): + """YouTube local cache.""" + + def __init__(self, root: Path) -> None: + self.root = root + + class Channel(Object): - def __init__(self, d: dict) -> None: - self.identifier = d["channel_id"] + """YouTube channel.""" + + def __init__(self, channel_id: str) -> None: + """Set objects tree. + + :param channel_id: channel identifier + :type channel_id: str + """ + d = extract_videos(channel_id) + # channel + self.uid = d["channel_id"] self.title = d["channel"] self.followers = int(d["channel_follower_count"]) self.description = d["description"] @@ -36,101 +38,54 @@ class Channel(Object): # TODO thumbnails self.uploader_id = d["uploader_id"] self.uploader = d["uploader"] - self.url = d["channel_url"] - # TODO entries + # videos + self.videos_ids = [video["id"] for video in reversed(d["entries"])] + # TODO filter members-only + # playlists + d = extract_playlists(channel_id) + self.playlists_ids = [playlist["id"] for playlist in reversed(d["entries"])] + def add_video(self, d: dict) -> dict: + """Add video extra info.""" + self.video_index += 1 + log.info(f"{self.video_index} ∕ {len(self.videos_ids)}") + self.videos.append(Video(d)) + return d -class Tab(Object, ABC): - """YouTube Tab.""" - - URL_ROOT = "https://youtube.com" - - def __init__(self, object_id: str) -> None: - """Set object id. - - :param object_id: object identifier - :type object_id: str - """ - self.object_id = object_id - log.info(self.object_id) - self.url = self.get_url() - log.info(self.url) - - def extract(self) -> dict[str, Any]: - """Return extracted dict. - - :rtype: dict - """ - yt_dl = Tab.yt_dl(self.get_options()) - return yt_dl.extract_info(self.url, download=False) - - def get_options(self) -> dict: - """Return options for the action. - - :rtype: dict - """ - return { - "extract_flat": True, - "skip_download": True, - } - - @abstractmethod - def get_url(self) -> str: - """Return URL to access for object. - - :rtype: str - """ - - @staticmethod - def yt_dl(opt: dict) -> YoutubeDL: - options = { - **opt, - "ignoreerrors": False, - "quiet": False, - } - log.info(options) - return YoutubeDL(options) - - -class Playlist(Tab): - def __init__(self, playlist_id: str) -> None: - super().__init__(playlist_id) - - def get_url(self) -> str: - return f"{Tab.URL_ROOT}/playlist?list={self.object_id}" - - -class Playlists(Tab): - def __init__(self, channel_id: str) -> None: - super().__init__(channel_id) - - def get_url(self) -> str: - return f"{Tab.URL_ROOT}/channel/{self.object_id}/playlists" - - -class Video(Tab): - def __init__(self, video_id: str) -> None: - super().__init__(video_id) - info = self.extract() - self.title = info["title"] - self.fulltitle = info["fulltitle"] - self.duration = info["duration"] - self.categories = info["categories"] - self.tags = info["tags"] - self.description = info["description"] - - def download(self) -> None: - Tab.yt_dl( + def load_videos(self) -> None: + """Load videos extra info.""" + self.videos = [] + # a + #for index, video_id in enumerate(self.videos_ids): + # log.info(f"{index} ∕ {len(self.videos_ids)}") + # self.videos.append(Video(video_id)) + # b + videos_urls = [url_video(video_id) for video_id in self.videos_ids] + y = ytdl( { - "format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]", - "outtmpl": "%(id)s.%(ext)s", - "writesubtitles": True, - "writethumbnail": True, + "process_info_hooks": [self.add_video], + "skip_download": True, }, - ).download([self.url]) + ) + y.download(videos_urls) - def get_url(self) -> str: - return f"{Tab.URL_ROOT}/watch?v={self.object_id}" + +class Video(Object): + """YouTube video.""" + + def __init__(self, d: dict) -> None: + """Set video info. + + :param d: video info + :type d: dict + """ + self.datetime = datetime.now().strftime("%Y%m%d%H%M%S") + self.uid = d["id"] + self.fulltitle = d["fulltitle"] + self.duration = d["duration"] + self.categories = d["categories"] + self.tags = d["tags"] + self.description = d["description"] # channel @@ -147,29 +102,6 @@ class Video(Tab): # duration # thumbnails # view_count -class Videos(Tab): - def __init__(self, channel_id: str) -> None: - super().__init__(channel_id) - info = self.extract() - self.title = info["title"] - self.ids = [v["id"] for v in reversed(info["entries"])] - self.videos = {} - - def get_url(self) -> str: - return f"{Tab.URL_ROOT}/channel/{self.object_id}/videos" - - def load(self) -> None: - done = 0 - for video_id in self.ids: - self.videos[video_id] = Video(video_id) - done += 1 - log.info(done) - - def next(self) -> str | None: - for video_id in self.ids: - if not Path(f"{video_id}.mp4").exists(): - return video_id - return None # ╭──────────╮ @@ -196,7 +128,7 @@ def download_video(video_id: str | None) -> None: "writesubtitles": True, "writethumbnail": True, }, - ).download([f"{URL}/watch?v={video_id}"]) + ).download([url_video(video_id)]) # ╭─────────╮ @@ -209,35 +141,35 @@ def extract(opt: dict[str, Any], url: str) -> dict[str, Any]: :rtype: dict """ - return ytdl( + d = ytdl( { **opt, "extract_flat": True, "skip_download": True, }, ).extract_info(url, download=False) + log.info(d) + return d -def extract_channel(channel_id: str) -> dict: - """Return extracted channel dict. +def extract_playlist(playlist_id: str) -> dict: + """Return extracted playlist dict. + + :param playlist_id: playlist identifier + :type playlist_id: str + :rtype: dict + """ + return extract({}, url_playlist(playlist_id)) + + +def extract_playlists(channel_id: str) -> dict: + """Return extracted playlists dict. :param channel_id: channel identifier :type channel_id: str :rtype: dict """ - d = extract({}, f"{URL}/channel/{channel_id}") - return d - - -def extract_videos(channel_id: str) -> dict: - """Return extracted videos dict. - - :param channel_id: channel identifier - :type channel_id: str - :rtype: dict - """ - d = extract({}, f"{URL}/channel/{channel_id}/videos") - return d + return extract({}, url_playlists(channel_id)) def extract_video(video_id: str) -> dict: @@ -247,8 +179,17 @@ def extract_video(video_id: str) -> dict: :type video_id: str :rtype: dict """ - d = extract({}, f"{URL}/watch?v={video_id}") - return d + return extract({}, url_video(video_id)) + + +def extract_videos(channel_id: str) -> dict: + """Return extracted videos dict. + + :param channel_id: channel identifier + :type channel_id: str + :rtype: dict + """ + return extract({}, url_videos(channel_id)) # ╭────────╮ @@ -306,6 +247,16 @@ def url_channel(channel_id: str) -> str: return f"{URL}/channel/{channel_id}" +def url_playlist(playlist_id: str) -> str: + """Return playlist URL. + + :param playlist_id: playlist identifier + :type playlist_id: str + :rtype: str + """ + return f"{URL}/playlist?list={playlist_id}" + + def url_playlists(channel_id: str) -> str: """Return playlists URL. @@ -316,6 +267,16 @@ def url_playlists(channel_id: str) -> str: return f"{url_channel(channel_id)}/playlists" +def url_video(video_id: str) -> str: + """Return video URL. + + :param video_id: video identifier + :type video_id: str + :rtype: str + """ + return f"{URL}/watch?v={video_id}" + + def url_videos(channel_id: str) -> str: """Return videos URL.