wip/cache

This commit is contained in:
Marc Beninca 2025-03-18 16:06:46 +01:00
parent 75d54d3dbf
commit e81cd5b745
Signed by: marc.beninca
GPG key ID: 9C7613450C80C24F

View file

@ -1,20 +1,6 @@
"""YouTube DownLoad."""
# playlists
# …
# entries
# playlists / entries
# title
# id
# playlist
# entries
# playlist / entries
# …
from abc import ABC, abstractmethod
from datetime import datetime
from pathlib import Path
from typing import Any
@ -26,9 +12,25 @@ from rwx.log import stream as log
URL = "https://youtube.com"
class Cache(Object):
"""YouTube local cache."""
def __init__(self, root: Path) -> None:
self.root = root
class Channel(Object):
def __init__(self, d: dict) -> None:
self.identifier = d["channel_id"]
"""YouTube channel."""
def __init__(self, channel_id: str) -> None:
"""Set objects tree.
:param channel_id: channel identifier
:type channel_id: str
"""
d = extract_videos(channel_id)
# channel
self.uid = d["channel_id"]
self.title = d["channel"]
self.followers = int(d["channel_follower_count"])
self.description = d["description"]
@ -36,101 +38,54 @@ class Channel(Object):
# TODO thumbnails
self.uploader_id = d["uploader_id"]
self.uploader = d["uploader"]
self.url = d["channel_url"]
# TODO entries
# videos
self.videos_ids = [video["id"] for video in reversed(d["entries"])]
# TODO filter members-only
# playlists
d = extract_playlists(channel_id)
self.playlists_ids = [playlist["id"] for playlist in reversed(d["entries"])]
def add_video(self, d: dict) -> dict:
"""Add video extra info."""
self.video_index += 1
log.info(f"{self.video_index} ∕ {len(self.videos_ids)}")
self.videos.append(Video(d))
return d
class Tab(Object, ABC):
"""YouTube Tab."""
URL_ROOT = "https://youtube.com"
def __init__(self, object_id: str) -> None:
"""Set object id.
:param object_id: object identifier
:type object_id: str
"""
self.object_id = object_id
log.info(self.object_id)
self.url = self.get_url()
log.info(self.url)
def extract(self) -> dict[str, Any]:
"""Return extracted dict.
:rtype: dict
"""
yt_dl = Tab.yt_dl(self.get_options())
return yt_dl.extract_info(self.url, download=False)
def get_options(self) -> dict:
"""Return options for the action.
:rtype: dict
"""
return {
"extract_flat": True,
"skip_download": True,
}
@abstractmethod
def get_url(self) -> str:
"""Return URL to access for object.
:rtype: str
"""
@staticmethod
def yt_dl(opt: dict) -> YoutubeDL:
options = {
**opt,
"ignoreerrors": False,
"quiet": False,
}
log.info(options)
return YoutubeDL(options)
class Playlist(Tab):
def __init__(self, playlist_id: str) -> None:
super().__init__(playlist_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/playlist?list={self.object_id}"
class Playlists(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/playlists"
class Video(Tab):
def __init__(self, video_id: str) -> None:
super().__init__(video_id)
info = self.extract()
self.title = info["title"]
self.fulltitle = info["fulltitle"]
self.duration = info["duration"]
self.categories = info["categories"]
self.tags = info["tags"]
self.description = info["description"]
def download(self) -> None:
Tab.yt_dl(
def load_videos(self) -> None:
"""Load videos extra info."""
self.videos = []
# a
#for index, video_id in enumerate(self.videos_ids):
# log.info(f"{index} ∕ {len(self.videos_ids)}")
# self.videos.append(Video(video_id))
# b
videos_urls = [url_video(video_id) for video_id in self.videos_ids]
y = ytdl(
{
"format": "bestvideo[ext=mp4]+bestaudio[ext=mp4]",
"outtmpl": "%(id)s.%(ext)s",
"writesubtitles": True,
"writethumbnail": True,
"process_info_hooks": [self.add_video],
"skip_download": True,
},
).download([self.url])
)
y.download(videos_urls)
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/watch?v={self.object_id}"
class Video(Object):
"""YouTube video."""
def __init__(self, d: dict) -> None:
"""Set video info.
:param d: video info
:type d: dict
"""
self.datetime = datetime.now().strftime("%Y%m%d%H%M%S")
self.uid = d["id"]
self.fulltitle = d["fulltitle"]
self.duration = d["duration"]
self.categories = d["categories"]
self.tags = d["tags"]
self.description = d["description"]
# channel
@ -147,29 +102,6 @@ class Video(Tab):
# duration
# thumbnails
# view_count
class Videos(Tab):
def __init__(self, channel_id: str) -> None:
super().__init__(channel_id)
info = self.extract()
self.title = info["title"]
self.ids = [v["id"] for v in reversed(info["entries"])]
self.videos = {}
def get_url(self) -> str:
return f"{Tab.URL_ROOT}/channel/{self.object_id}/videos"
def load(self) -> None:
done = 0
for video_id in self.ids:
self.videos[video_id] = Video(video_id)
done += 1
log.info(done)
def next(self) -> str | None:
for video_id in self.ids:
if not Path(f"{video_id}.mp4").exists():
return video_id
return None
# ╭──────────╮
@ -196,7 +128,7 @@ def download_video(video_id: str | None) -> None:
"writesubtitles": True,
"writethumbnail": True,
},
).download([f"{URL}/watch?v={video_id}"])
).download([url_video(video_id)])
# ╭─────────╮
@ -209,35 +141,35 @@ def extract(opt: dict[str, Any], url: str) -> dict[str, Any]:
:rtype: dict
"""
return ytdl(
d = ytdl(
{
**opt,
"extract_flat": True,
"skip_download": True,
},
).extract_info(url, download=False)
log.info(d)
return d
def extract_channel(channel_id: str) -> dict:
"""Return extracted channel dict.
def extract_playlist(playlist_id: str) -> dict:
"""Return extracted playlist dict.
:param playlist_id: playlist identifier
:type playlist_id: str
:rtype: dict
"""
return extract({}, url_playlist(playlist_id))
def extract_playlists(channel_id: str) -> dict:
"""Return extracted playlists dict.
:param channel_id: channel identifier
:type channel_id: str
:rtype: dict
"""
d = extract({}, f"{URL}/channel/{channel_id}")
return d
def extract_videos(channel_id: str) -> dict:
"""Return extracted videos dict.
:param channel_id: channel identifier
:type channel_id: str
:rtype: dict
"""
d = extract({}, f"{URL}/channel/{channel_id}/videos")
return d
return extract({}, url_playlists(channel_id))
def extract_video(video_id: str) -> dict:
@ -247,8 +179,17 @@ def extract_video(video_id: str) -> dict:
:type video_id: str
:rtype: dict
"""
d = extract({}, f"{URL}/watch?v={video_id}")
return d
return extract({}, url_video(video_id))
def extract_videos(channel_id: str) -> dict:
"""Return extracted videos dict.
:param channel_id: channel identifier
:type channel_id: str
:rtype: dict
"""
return extract({}, url_videos(channel_id))
# ╭────────╮
@ -306,6 +247,16 @@ def url_channel(channel_id: str) -> str:
return f"{URL}/channel/{channel_id}"
def url_playlist(playlist_id: str) -> str:
"""Return playlist URL.
:param playlist_id: playlist identifier
:type playlist_id: str
:rtype: str
"""
return f"{URL}/playlist?list={playlist_id}"
def url_playlists(channel_id: str) -> str:
"""Return playlists URL.
@ -316,6 +267,16 @@ def url_playlists(channel_id: str) -> str:
return f"{url_channel(channel_id)}/playlists"
def url_video(video_id: str) -> str:
"""Return video URL.
:param video_id: video identifier
:type video_id: str
:rtype: str
"""
return f"{URL}/watch?v={video_id}"
def url_videos(channel_id: str) -> str:
"""Return videos URL.