initial commit

This commit is contained in:
2025-07-20 00:51:55 +03:00
commit 42684e0cb6
29 changed files with 2615 additions and 0 deletions
+74
View File
@@ -0,0 +1,74 @@
from dataclasses import dataclass
from typing import Sized
@dataclass(frozen=True)
class Content:
tmdb_id: int = None
vote_average: float = None
adult: bool = None
title: str = None
og_title: str = None
homepage: str = None
poster_url: str = None
backdrop_url: str = None
release_date: str = None
genres: tuple[Sized, ...] = None
@dataclass(frozen=True)
class Movie(Content):
type: str = "movie"
file_size: int = None
budget: int = None
runtime: int = None
file_url: str = None
@dataclass(frozen=True)
class Episode:
type: str = "episode"
episode_number: int = None
season_number: int = None
runtime: int = None
vote_average: float = None
title: str = None
file_url: str = None
episode_type: str = None
release_date: str = None
@dataclass(frozen=True)
class Season:
type: str = "season"
season_number: int = None
episodes_count: int = None
vote_average: float = None
title: str = None
poster_url: str = None
release_date: str = None
episodes: tuple[Episode, ...] = None
@dataclass(frozen=True)
class TVShow(Content):
type: str = "tv"
number_of_episodes: int = None
number_of_seasons: int = None
in_production: bool = None
seasons: tuple[Season, ...] = None
+95
View File
@@ -0,0 +1,95 @@
import asyncio
import logging
from datetime import datetime
from pathlib import Path
import aiofiles
import orjson
import config
import movies.tmdb as tmdb
import movies.yandex_disk as yandex_disk
from movies.classes import Movie, TVShow
from singleton import Singleton
class MoviesDB(metaclass=Singleton):
def __init__(self, db_path: Path | str):
self.path = Path(db_path).resolve()
self.contents = []
self.by_tmdb_id: dict[int, Movie | TVShow] = {}
self.by_title: dict[str, Movie | TVShow] = {}
self.last_updated = None
def _assign_content(self):
self.by_tmdb_id = {}
self.by_title = {}
for content in self.contents:
tmdb_id = content.tmdb_id
self.by_tmdb_id[tmdb_id] = content
title, og_title = content.title, content.og_title
title = title if title else ""
og_title = og_title if og_title else ""
full_title = title + og_title
if full_title:
self.by_title[full_title] = content
async def auto_update(self):
while True:
await asyncio.sleep(config.MOVIES_DB_UPDATE_INTERVAL_SECONDS)
await self.update()
async def update(self):
logging.info("Updating Movies DB")
raw_contents = await yandex_disk.get_all_contents()
self.contents = await tmdb.fetch_all_data(raw_contents)
self.last_updated = datetime.now()
await self.save_to_disk()
self._assign_content()
logging.info("Finished updating Movies DB")
async def save_to_disk(self):
logging.info("Saving Movies DB to disk")
to_save = await asyncio.to_thread(orjson.dumps, {
"last_updated": self.last_updated,
"contents": self.contents
})
async with aiofiles.open(self.path, "wb") as file:
await file.write(to_save)
logging.info("Finished Saving Movies DB to disk")
async def load_from_disk(self):
if not self.path.exists():
return
logging.info("Loading Movies DB from disk")
async with aiofiles.open(self.path, "rb") as file:
file_content = await file.read()
if len(file_content) == 0:
return
loaded_db = await asyncio.to_thread(orjson.loads, file_content)
self.last_updated = loaded_db.get("last_updated", None)
contents = loaded_db.get("contents", [])
new_contents = []
for content in contents:
match content["type"]:
case "movie":
new_contents.append(Movie(**content))
case "tv":
new_contents.append(TVShow(**content))
case _:
continue
self.contents = new_contents
self._assign_content()
logging.info("Finished Loading Movies DB from disk")
+210
View File
@@ -0,0 +1,210 @@
import asyncio
import copy
import logging
import random
import aiohttp
from cachetools import TTLCache
from cachetools_async import cached
import config
from movies.classes import Movie, TVShow, Season, Episode
_BASE_API_URL = "https://api.themoviedb.org/3"
_BASE_IMAGE_URL = None
@cached(TTLCache(maxsize=config.CACHE_MAXSIZE, ttl=config.CACHE_TTL))
async def _fetch_tmdb_data():
global _BASE_IMAGE_URL
logging.info("Fetching TMDB configuration data")
headers = {
"Authorization": f"Bearer {config.TMDB_API_KEY}",
"Accept": "application/json"
}
proxy = random.choice(config.PROXIES) if config.PROXIES else None
async with aiohttp.ClientSession(proxy=proxy) as session:
async with session.get(
url=f"{_BASE_API_URL}/configuration",
headers=headers,
params={"language": config.TMDB_LANG}
) as response:
assert response.status == 200, "Failed to fetch TMDB configuration data"
response_json = await response.json()
_BASE_IMAGE_URL = response_json["images"]["base_url"]
@cached(TTLCache(maxsize=config.CACHE_MAXSIZE, ttl=config.CACHE_TTL))
async def _fetch_movie(movie: Movie) -> Movie:
headers = {
"Authorization": f"Bearer {config.TMDB_API_KEY}",
"Accept": "application/json"
}
proxy = random.choice(config.PROXIES) if config.PROXIES else None
async with aiohttp.ClientSession(proxy=proxy) as session:
async with session.get(
url=f"{_BASE_API_URL}/movie/{movie.tmdb_id}",
headers=headers,
params={"language": config.TMDB_LANG}
) as response:
assert response.status == 200, f"Failed to fetch information for {movie.tmdb_id}"
response_json = await response.json()
return Movie(
tmdb_id=movie.tmdb_id,
file_size=movie.file_size,
file_url=movie.file_url,
title=response_json.get("title", movie.title),
budget=response_json.get("budget"),
runtime=response_json.get("runtime"),
vote_average=response_json.get("vote_average"),
adult=response_json.get("adult"),
og_title=response_json.get("original_title"),
homepage=response_json.get("homepage"),
poster_url=f"{_BASE_IMAGE_URL}original{response_json["poster_path"]}" if response_json.get(
"poster_path") else None,
backdrop_url=f"{_BASE_IMAGE_URL}original{response_json["backdrop_path"]}" if response_json.get(
"backdrop_path") else None,
release_date=response_json.get("release_date"),
genres=tuple(sorted([genre["name"].capitalize() for genre in response_json.get("genres", [])], key=len)),
)
@cached(TTLCache(maxsize=config.CACHE_MAXSIZE, ttl=config.CACHE_TTL))
async def _fetch_tv_show(tv_show: TVShow) -> TVShow:
headers = {
"Authorization": f"Bearer {config.TMDB_API_KEY}",
"Accept": "application/json"
}
proxy = random.choice(config.PROXIES) if config.PROXIES else None
async with aiohttp.ClientSession(proxy=proxy) as session:
async with session.get(
url=f"{_BASE_API_URL}/tv/{tv_show.tmdb_id}",
headers=headers,
params={"language": config.TMDB_LANG}
) as tv_response:
assert tv_response.status == 200, f"Failed to fetch information for {tv_show.tmdb_id}"
tv_response_json = await tv_response.json()
seasons_responses_jsons = {}
for i in range(len(tv_show.seasons)):
season_number = tv_show.seasons[i].season_number
async with session.get(
url=f"{_BASE_API_URL}/tv/{tv_show.tmdb_id}/season/{season_number}",
headers=headers,
params={"language": config.TMDB_LANG}
) as season_response:
assert tv_response.status == 200, f"Failed to fetch information for {tv_show.tmdb_id}"
seasons_responses_jsons[season_number] = await season_response.json()
seasons_dict = {}
for season_number, season_response_json in seasons_responses_jsons.items():
for episode_response_json in season_response_json.get("episodes", []):
episode_number = episode_response_json["episode_number"]
raw_season = None
for season in tv_show.seasons:
if season.season_number == season_number:
raw_season = season
break
if raw_season is None:
continue
raw_episode = None
for episode in raw_season.episodes:
if episode.episode_number == episode_number:
raw_episode = episode
break
if raw_episode is None:
continue
if not seasons_dict.get(season_number):
seasons_dict[season_number] = []
episode = Episode(
episode_number=episode_number,
season_number=season_number,
runtime=episode_response_json.get("runtime"),
vote_average=episode_response_json.get("vote_average"),
title=episode_response_json.get("name", str(episode_number)),
file_url=raw_episode.file_url,
episode_type=episode_response_json.get("episode_type"),
release_date=episode_response_json.get("air_date"),
)
seasons_dict[season_number].append(episode)
seasons = []
for season_number, season_response_json in seasons_responses_jsons.items():
raw_season = None
for season in tv_show.seasons:
if season.season_number == season_number:
raw_season = season
break
if raw_season is None:
continue
season = Season(
season_number=season_number,
episodes_count=len(seasons_dict[season_number]),
vote_average=season_response_json.get("vote_average"),
title=season_response_json.get("name", str(season_number)),
poster_url=f"{_BASE_IMAGE_URL}original{season_response_json["poster_path"]}" if season_response_json.get(
"poster_path") else None,
release_date=season_response_json.get("air_date"),
episodes=tuple(copy.deepcopy(seasons_dict[season_number]))
)
seasons.append(season)
return TVShow(
tmdb_id=tv_show.tmdb_id,
number_of_episodes=tv_show.number_of_episodes,
number_of_seasons=len(seasons),
vote_average=tv_response_json.get("vote_average"),
adult=tv_response_json.get("adult"),
title=tv_response_json.get("name"),
og_title=tv_response_json.get("original_name"),
homepage=tv_response_json.get("homepage"),
poster_url=f"{_BASE_IMAGE_URL}original{tv_response_json["poster_path"]}" if tv_response_json.get(
"poster_path") else None,
backdrop_url=f"{_BASE_IMAGE_URL}original{tv_response_json["backdrop_path"]}" if tv_response_json.get(
"backdrop_path") else None,
release_date=tv_response_json.get("first_air_date"),
in_production=tv_response_json.get("production"),
seasons=tuple(seasons),
genres=tuple(sorted([genre["name"].capitalize() for genre in tv_response_json.get("genres", [])], key=len)),
)
async def _fetch_data(content: Movie | TVShow) -> Movie | TVShow:
logging.info("Fetching data for %s", content.tmdb_id)
match content.type:
case "movie":
return await _fetch_movie(content)
case "tv":
return await _fetch_tv_show(content)
case _:
raise TypeError(f"Unknown content type: {content.type}")
async def fetch_all_data(contents: list[Movie | TVShow]) -> list[Movie | TVShow]:
logging.info("Fetching data for %s contents", len(contents))
await _fetch_tmdb_data()
tasks = [_fetch_data(content) for content in contents]
return await asyncio.gather(*tasks)
+104
View File
@@ -0,0 +1,104 @@
import asyncio
import copy
import itertools
import logging
from cachetools import TTLCache
from cachetools_async import cached
from yndx_disk.classes import Directory
from yndx_disk.clients import AsyncDiskClient
import config
from movies.classes import Movie, TVShow, Season, Episode
@cached(TTLCache(maxsize=config.CACHE_MAXSIZE, ttl=config.CACHE_TTL))
async def _parse_tv_show(disk_client: AsyncDiskClient, directory: Directory) -> TVShow:
file_type, tmdb_id, name = map(str.strip, directory.name.split("#"))
logging.info(f"Parsing TV show %s", tmdb_id)
seasons_dict = {}
contents = await disk_client.listdir(path=directory.path, limit=10000)
for obj in contents:
if type(obj) is Directory:
continue
obj_name, obj_extension = map(str.strip, obj.name.split("."))
season_number, episode_number = map(str.strip, obj_name.split("#"))
if not seasons_dict.get(season_number):
seasons_dict[season_number] = []
episode = Episode(
episode_number=int(episode_number),
season_number=int(season_number),
file_url=obj.file_url
)
seasons_dict[season_number].append(episode)
seasons = []
for season_number, episodes in seasons_dict.items():
season = Season(
season_number=int(season_number),
episodes_count=len(episodes),
episodes=tuple(copy.deepcopy(episodes))
)
seasons.append(season)
return TVShow(
tmdb_id=int(tmdb_id),
number_of_episodes=len(list(itertools.chain.from_iterable([s.episodes for s in seasons]))),
number_of_seasons=len(seasons),
title=name,
seasons=tuple(seasons)
)
@cached(TTLCache(maxsize=config.CACHE_MAXSIZE, ttl=config.CACHE_TTL))
async def _get_contents_on_disk(token: str, path: str) -> list[Movie]:
logging.info("Fetching contents from disk ...%s", token[-10:])
disk_client = AsyncDiskClient(token=token, auto_update_info=False)
files = await disk_client.listdir(path, limit=10000)
logging.info("Found %s files on disk ...%s", len(files), token[-10:])
movies = []
tv_shows = []
for obj in files:
file_type, tmdb_id, name = map(str.strip, obj.name.split("#"))
match file_type:
case "movie":
name, extension = map(str.strip, name.split("."))
movie = Movie(
tmdb_id=int(tmdb_id),
file_size=int(obj.size),
file_url=obj.file_url,
title=name
)
movies.append(movie)
case "tv":
pass
tv_show = await _parse_tv_show(disk_client=disk_client, directory=obj)
tv_shows.append(tv_show)
case _:
continue
logging.info("Found %s contents on disk ...%s", len(movies), token[-10:])
return movies + tv_shows
async def get_all_contents() -> list[Movie]:
logging.info("Fetching all contents on all disks")
tasks = [_get_contents_on_disk(token, path) for token, path in config.YANDEX_CONFIGS]
movies = await asyncio.gather(*tasks)
movies = list(itertools.chain.from_iterable(movies))
logging.info("Found %s contents on all disks", len(movies))
return movies