Init flask / ponyorm server

master
Felix Stupp 3 years ago
commit 83b88fb89e
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

273
server/.gitignore vendored

@ -0,0 +1,273 @@
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,flask,windows,linux
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,flask,windows,linux
### Flask ###
instance/*
!instance/.gitignore
.webassets-cache
.env
### Flask.Python Stack ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### Python ###
# Byte-compiled / optimized / DLL files
# C extensions
# Distribution / packaging
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
# Installer logs
# Unit test / coverage reports
# Translations
# Django stuff:
# Flask stuff:
# Scrapy stuff:
# Sphinx documentation
# PyBuilder
# Jupyter Notebook
# IPython
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
# Celery stuff
# SageMath parsed files
# Environments
# Spyder project settings
# Rope project settings
# mkdocs documentation
# mypy
# Pyre type checker
# pytype static type analyzer
# Cython debug symbols
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,flask,windows,linux
/config.yml
*.sqlite

@ -0,0 +1,237 @@
####
## Imports
####
from __future__ import annotations
from functools import partial
import logging
import os
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Optional, Union
from flask import Flask, jsonify, make_response, request
from flask.templating import render_template
from markupsafe import Markup
from pony.flask import Pony
from pony import orm
from entertainment_decider import common
from entertainment_decider.models import db, MediaCollection, MediaCollectionLink, MediaElement
from entertainment_decider.extractors.collection import collection_extract_uri
from entertainment_decider.extractors.media import media_extract_uri
####
## Logging Config
####
logging.basicConfig(format = "%(asctime)s === %(message)s", level=logging.DEBUG)
####
## Flask Config
####
flask_app = Flask(__name__)
flask_app.config.update(dict(
CELERY = dict(
),
DEBUG = True,
PONY = dict(
provider = "sqlite",
filename = "./db.sqlite",
create_db = True,
)
))
def environ_bool(value: Union[str, bool]) -> bool:
if type(value) == bool:
return value
return value.strip()[0].lower() in ["1", "t", "y"]
ConfigKeySetter: Callable[[str, Any], Any]
ConfigSingleTranslator = Callable[[Any], Any]
ConfigTranslatorIterable = Iterable[ConfigSingleTranslator]
ConfigTranslatorCreator = Callable[[str], ConfigTranslatorIterable]
def config_suffixer(setter: ConfigKeySetter, prefix: str, lower: bool = True) -> ConfigTranslatorCreator:
def creator(key: str):
if not key.startswith(prefix):
raise Exception(f"Environment key {key!r} is missing suffix {prefix!r}")
new_key = key[len(prefix):]
new_key = new_key.lower() if lower else new_key
return (
partial(setter, new_key)
)
return creator
def celery_config_setter(key: str, val: Any):
flask_app.config["CELERY"][key] = val
celery_config_same = config_suffixer(celery_config_setter, "CELERY_")
def flask_config_setter(key: str, val: Any):
flask_app.config[key] = val
flask_config_same = config_suffixer(flask_config_setter, "FLASK_", lower=False)
def pony_config_setter(key: str, val: Any):
flask_app.config["PONY"][key] = val
pony_config_same = config_suffixer(pony_config_setter, "PONY_")
CONFIG_TRANSLATE_TABLE: Dict[str, Union[ConfigTranslatorIterable, ConfigTranslatorCreator]] = {
"CELERY_BROKER_URL": celery_config_same,
"CELERY_RESULT_BACKEND": celery_config_same,
"FLASK_DEBUG": (
environ_bool,
partial(flask_config_setter, "DEBUG"),
),
"PONY_PROVIDER": pony_config_same,
"PONY_FILENAME": pony_config_same,
"PONY_CREATE_DB": (
environ_bool,
partial(pony_config_setter, "create_db"),
),
"PONY_HOST": pony_config_same,
"PONY_DATABASE": pony_config_same,
"PONY_DB": pony_config_same,
"PONY_USER": pony_config_same,
"PONY_PASSWORD": pony_config_same,
"PONY_PASSWD": pony_config_same,
"PONY_DSN": pony_config_same,
}
for key, val in os.environ.items():
trans = CONFIG_TRANSLATE_TABLE.get(key)
if trans is not None:
trans = trans(key) if callable(trans) else trans
res: Any = val
for caller in trans:
new_res = caller(res)
if new_res is None:
res = res
####
## Pony init
####
db.bind(**flask_app.config["PONY"])
db.generate_mapping(create_tables=True)
Pony(flask_app)
####
## Return filters
####
@flask_app.template_filter()
def as_link(uri: str):
uri = Markup.escape(uri)
return Markup(f'<a href="{uri}">{uri}</a>')
@flask_app.template_filter()
def tenary(b: bool, true_str: str, false_str: str) -> str:
return true_str if b else false_str
####
## Routes
####
@flask_app.route("/")
def hello_world():
return '<a href=/collection>Collections</a> & <a href=/media>Media</a>'
@flask_app.route("/collection")
def list_collection():
collection_list: Iterable[MediaCollection] = MediaCollection.select().order_by(orm.desc(MediaCollection.release_date), MediaCollection.title, MediaCollection.id)
return render_template("collection_list.htm", collection_list=collection_list)
@flask_app.route("/collection/<int:collection_id>")
def show_collection(collection_id):
collection: MediaCollection = MediaCollection.get(id=collection_id)
if collection is None:
return make_response(f"Not found", 404)
return render_template(
"collection_element.htm",
collection=collection,
media_links=MediaCollectionLink.sorted(MediaCollectionLink.select(lambda l: l.collection == collection)),
)
@flask_app.route("/collection/<int:collection_id>", methods = ["POST"])
def update_collection(collection_id):
collection: MediaCollection = MediaCollection.get(id=collection_id)
if collection is None:
return f"Not found", 404
data: Optional[Dict] = request.get_json()
if data is None:
return f"JSON data missing", 400
for key in data.keys():
if key not in ["watch_in_order"]:
return {
"successful": False,
"error": {
"message": f"Failed to update key {key!r} as this is not allowed to update on a collection",
},
}, 400
for key, value in data.items():
if key == "watch_in_order":
collection.watch_in_order = common.update_bool_value(collection.watch_in_order, value)
collection.watch_in_order_auto = False
return {
"successful": True,
"error": None,
}, 200
@flask_app.route("/media")
def list_media():
media_list: Iterable[MediaElement] = MediaElement.select().order_by(orm.desc(MediaElement.release_date), MediaElement.id)
return render_template("media_list.htm", media_list=list(media_list))
@flask_app.route("/media/length")
def get_media_length():
c = len(MediaElement.select())
return f"{c}"
@flask_app.route("/media/<int:media_id>")
def show_media(media_id):
element: MediaElement = MediaElement.get(id=media_id)
if element is None:
return make_response(f"Not found", 404)
return render_template("media_element.htm", element=element)
@flask_app.route("/debug/test")
def test():
first: MediaElement = MediaElement.select().first()
return {
"data": first.to_dict(),
}, 200
# TODO add table for failed attempts so these may be resolved afterwards with increasing delays (add to MediaElement with flag "retrieved" and "extractor_cache_date" as date to resolve last try)
@flask_app.route("/api/media/list")
def api_media_list():
media_list: Iterable[MediaElement] = MediaElement.select()
return {
"status": True,
"data": [{
"id": media.id,
"title": media.title,
"release_date": media.release_date,
"length": media.length,
"progress": media.progress,
} for media in media_list],
}, 200

@ -0,0 +1,13 @@
import subprocess
from typing import Literal, Union
def call(args, check=True, stdin=None) -> subprocess.CompletedProcess:
proc = subprocess.run(args, capture_output=True, check=check, text=True, stdin=stdin)
return proc
def update_bool_value(old_value: bool, new_value: Union[bool, Literal["toggle"]]) -> bool:
if new_value == "toggle":
return not old_value
if type(new_value) != bool:
raise Exception(f"Invalid type of new_value: Expected bool or literal \"toggle\", got type={type(new_value)!r}, value={new_value!r}")
return new_value

@ -0,0 +1,7 @@
from pathlib import Path
import yaml
with Path("./config.yml").open("r") as fh:
app_config = yaml.safe_load(fh)

@ -0,0 +1,25 @@
from __future__ import annotations
from typing import Dict
from ...config import app_config
from ...models import MediaCollection
from .base import CollectionExtractor
from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
from .youtube import YouTubeCollectionExtractor
tt_rss_params = TtRssConnectionParameter(**app_config["extractors"]["tt_rss"])
COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
"tt-rss": TtRssCollectionExtractor(params=tt_rss_params, label_filter=-1033),
"youtube": YouTubeCollectionExtractor(),
}
def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection:
elem: MediaCollection = CollectionExtractor.check_uri(uri)
ex = COLLECTION_EXTRACTORS[extractor_name]
if not elem:
elem = ex.extract_and_store(uri)
else:
ex.update_object(elem, check_cache_expired=False)
return elem

@ -0,0 +1,37 @@
from __future__ import annotations
import logging
from typing import Optional, TypeVar
from ...models import CollectionUriMapping, MediaCollection
from ..generic import ExtractedData, GeneralExtractor
T = TypeVar("T")
class CollectionExtractor(GeneralExtractor[MediaCollection, T]):
@staticmethod
def check_uri(uri: str) -> Optional[MediaCollection]:
mapping: CollectionUriMapping = CollectionUriMapping.get(uri=uri)
if mapping:
return mapping.element
elem: MediaCollection = MediaCollection.get(uri=uri)
if elem:
logging.warning(
f"Add missing URI mapping entry for uri {uri!r}, " +
"this should not happen at this point and is considered a bug"
)
elem.add_uris((uri,))
return elem
return None
def _create_object(self, data: ExtractedData[T]) -> MediaCollection:
collection = data.create_collection()
return collection
def _load_object(self, data: ExtractedData[T]) -> MediaCollection:
collection = data.load_collection()
collection.keep_updated = True
return collection

@ -0,0 +1,177 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
from enum import Enum
from functools import partial
import logging
import re
from typing import Dict, List, Optional
import urllib.parse as url
from pony import orm # TODO remove
from tinytinypy import Connection
from tinytinypy.main import Headline
from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError
from .base import CollectionExtractor
logger = logging.getLogger(__name__)
@dataclass
class TtRssConnectionParameter:
host: str
username: str
password: str
proto: str = "https"
endpoint: str = "/api/"
TT_RSS_CONNECTION: Connection = None
HeadlineList = List[Headline]
def _build_connection(params: TtRssConnectionParameter) -> Connection:
global TT_RSS_CONNECTION
if TT_RSS_CONNECTION is None:
TT_RSS_CONNECTION = Connection(proto=params.proto, host=params.host, endpoint=params.endpoint)
if not TT_RSS_CONNECTION.isLoggedIn():
TT_RSS_CONNECTION.login(username=params.username, password=params.password)
return TT_RSS_CONNECTION
def get_headlines(params: TtRssConnectionParameter, **kwargs) -> HeadlineList:
conn = _build_connection(params)
if "limit" in kwargs:
kwargs["limit"] = int(kwargs["limit"])
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f"Request headlines from tt-rss: {kwargs!r}")
headlines = conn.getHeadlines(**kwargs)
logger.debug(f"Got {len(headlines)} headlines from tt-rss using: {kwargs!r}")
return headlines
class TtRssUriKind(Enum):
ALL = ("all", lambda id: get_headlines)
CATEGORY = ("category", lambda id: partial(get_headlines, cat_id=id))
FEED = ("feed", lambda id: partial(get_headlines, feed_id=id))
@property
def path_name(self) -> str:
return self.value[0]
@property
def request(self):
return self.value[1]
@classmethod
def from_path_name(cls, name: str) -> "TtRssUriKind":
for e in cls:
if e.path_name.lower() == name.lower():
return e
raise KeyError()
@dataclass
class TtRssUri:
supported_kinds = '|'.join(re.escape(n.path_name.lower()) for n in TtRssUriKind)
scheme = "tt-rss"
path_re = re.compile(fr"^/((?P<all>all)|(?P<kind>{supported_kinds})/(?P<id>-?\d+))/?$")
kind: TtRssUriKind
id: Optional[str]
options: Dict[str, str]
@classmethod
def from_str_uri(cls, uri: str) -> "TtRssUri":
parts = url.urlparse(uri, scheme=cls.scheme)
if parts.scheme != cls.scheme:
raise Exception(f"Invalid scheme for tt-rss uri: {parts.scheme!r}")
if (parts.netloc, parts.params, parts.fragment) != ("", "", ""):
raise Exception(f"tt-rss uris do not accept netloc, params and fragments")
m = cls.path_re.search(parts.path)
if m is None:
raise Exception(f"Could not parse path of tt-rss uri: {parts.path!r}")
return TtRssUri(
kind = TtRssUriKind.ALL if m.group("all") else TtRssUriKind.from_path_name(m.group("kind")),
id = m.group("id"),
options = {single[0]: single[1] for single in (single.split("=") for single in parts.query.split("&"))} if parts.query else {},
)
def request(self, params: TtRssConnectionParameter, **kwargs) -> HeadlineList:
return self.kind.request(self.id)(params, **self.options, **kwargs)
class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
__params: TtRssConnectionParameter
__label_filter: Optional[int]
__mark_as_read: bool
def __init__(self,
params: TtRssConnectionParameter,
mark_as_read: bool = False,
label_filter: Optional[int] = None,
):
super().__init__("tt-rss")
self.__params = params
self.__label_filter = label_filter
self.__mark_as_read = mark_as_read
def __decode_uri(self, uri: str) -> TtRssUri:
return TtRssUri.from_str_uri(uri)
def can_extract_offline(self, uri: str, cache: Dict = None) -> bool:
return True
def _cache_expired(self, date: datetime) -> bool:
return (datetime.now() - date) < timedelta(hours=4)
def _extract_offline(self, uri: str, cache: Dict = None) -> ExtractedData[HeadlineList]:
return ExtractedData(
extractor_name=self.name,
object_key=uri,
object_uri=uri,
cache=cache,
)
def _extract_online(self, uri: str, cache: Dict = None) -> ExtractedData[HeadlineList]:
rss_uri = self.__decode_uri(uri)
logging.info(f"Extract collection from tt-rss: {uri!r}")
data = rss_uri.request(self.__params, order_by="feed_dates", view_mode="unread")
if self.__label_filter is not None:
print([headline.labels for headline in data])
data = [
headline for headline in data
if self.__label_filter in (label_marker[0] for label_marker in headline.labels)
]
if self.__mark_as_read:
parameters = {
"article_ids": ",".join(str(headline.feedId) for headline in data),
"field": "2", # unread
"mode": "0", # false
}
raise NotImplementedError("Cannot set articles as read with tinytinypy for now") # TODO
return ExtractedData(
extractor_name=self.name,
object_key=uri,
object_uri=uri,
data=data,
)
def _update_object_raw(self, object: MediaCollection, data: HeadlineList) -> str:
if not object.title:
object.title = object.uri
logging.debug(f"Got {len(data)} headlines")
for headline in data:
logging.debug(f"Add to collection {headline.url!r}")
try:
object.add_episode(media_extract_uri("ytdl", headline.url))
orm.commit()
except ExtractionError:
logging.warning(f"Failed while extracting media {headline.url!r}", exc_info=True)
if object.watch_in_order_auto:
object.watch_in_order = False # no order available

@ -0,0 +1,105 @@
from __future__ import annotations
from datetime import datetime, timedelta
import logging
import re
from typing import Dict
from pony import orm # TODO remove
import youtubesearchpython
from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError
from .base import CollectionExtractor
class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
__uri_regex = re.compile(r"^https?://(www\.)?youtube\.com/(channel/|playlist\?list=)(?P<id>[^/&?]+)")
@classmethod
def __get_id(cls, uri: str) -> str:
m = cls.__uri_regex.search(uri)
if not m:
raise Exception(f"Failed to parse Youtube collection uri {uri!r}")
return m.group("id")
@staticmethod
def __is_channel_id(collection_id: str) -> bool:
return collection_id.startswith("UC") or collection_id.startswith("UU")
@staticmethod
def __convert_channel_id(channel_id: str) -> str:
if channel_id.startswith("UU"):
return channel_id
if channel_id.startswith("UC"):
return f"UU{channel_id[2:]}"
raise Exception(f"Got not valid channel id: {channel_id!r}")
@classmethod
def __convert_if_required(cls, collection_id: str) -> str:
if cls.__is_channel_id(collection_id):
return cls.__convert_channel_id(collection_id)
return collection_id
def __init__(self):
super().__init__("youtube")
def can_extract_offline(self, uri: str, cache: Dict = None) -> bool:
return True
def _cache_expired(self, date: datetime) -> bool:
return (datetime.now() - date) < timedelta(hours=4)
def _extract_offline(self, uri: str, cache: Dict = None) -> ExtractedData[Dict]:
playlist_id = self.__convert_if_required(self.__get_id(uri))
return ExtractedData(
extractor_name=self.name,
object_key=playlist_id,
object_uri=uri,
cache=cache,
)
def _extract_online(self, uri: str, cache: Dict = None) -> ExtractedData[Dict]:
playlist_id = self.__convert_if_required(self.__get_id(uri))
playlist_link = f"https://www.youtube.com/playlist?list={playlist_id}"
logging.info(f"Request Youtube playlist {playlist_link!r}")
playlist = youtubesearchpython.Playlist(playlist_link)
while playlist.hasMoreVideos:
playlist.getNextVideos()
logging.debug(f"Retrieved {len(playlist.videos)} videos from playlist {playlist_link!r}")
return ExtractedData(
extractor_name=self.name,
object_key=playlist_id,
object_uri=uri,
data={
"info": playlist.info["info"],
"videos": playlist.videos,
},
)
def _update_object_raw(self, object: MediaCollection, data: Dict):
info = data["info"]
object.title = f"{info['title']} ({info['channel']['name']})"
object.add_uris((info["link"],))
video_list = data["videos"]
is_channel = self.__is_channel_id(info["id"])
if object.watch_in_order_auto:
object.watch_in_order = not is_channel
len_video_list = len(video_list)
if is_channel:
video_list = reversed(video_list)
for index, video in enumerate(video_list):
video_url = f"https://www.youtube.com/watch?v={video['id']}"
other_urls = [
f"https://youtube.com/watch?v={video['id']}",
f"https://youtu.be/{video['id']}",
]
logging.debug(f"[youtube] Add to collection {object.title!r} video {video_url!r} ({index+1} of {len_video_list})")
try:
element = media_extract_uri("ytdl", video_url)
element.add_uris(other_urls)
object.add_episode(element, episode=index+1)
orm.commit() # so progress is stored
except ExtractionError:
logging.warning(f"Failed while extracting media {video_url!r}", exc_info=True)

@ -0,0 +1,148 @@
from __future__ import annotations
import dataclasses
from dataclasses import dataclass
from datetime import datetime
import logging
from typing import Dict, Generic, Optional, TypeVar
from ..models import MediaCollection, MediaElement
T = TypeVar("T")
class ExtractionError(Exception):
pass
@dataclass
class ExtractedDataLight:
object_uri: str
extractor_name: str
object_key: str
def create_media(self) -> MediaElement:
return MediaElement(
uri = self.object_uri,
extractor_name = self.extractor_name,
extractor_key = self.object_key,
)
def create_collection(self) -> MediaCollection:
return MediaCollection(
uri = self.object_uri,
extractor_name = self.extractor_name,
extractor_key = self.object_key
)
@dataclass
class ExtractedData(ExtractedDataLight, Generic[T]):
data: T = dataclasses.field(default=None, repr=False, compare=False)
cache: Dict = dataclasses.field(default=None, repr=False, compare=False)
@property
def has_data(self) -> bool:
return self.data is not None
def load_media(self) -> MediaElement:
return MediaElement.get(extractor_name=self.extractor_name, extractor_key=self.object_key)
def load_collection(self) -> MediaCollection:
return MediaCollection.get(extractor_name=self.extractor_name, extractor_key=self.object_key)
@dataclass
class AuthorExtractedData(ExtractedDataLight):
author_name: str
@property
def is_valid(self):
return len(list(v for _, v in self.__dict__.items() if v is None)) <= 0
E = TypeVar("E", MediaElement, MediaCollection)
class GeneralExtractor(Generic[E, T]):
name: str
def __init__(self, name: str):
self.name = name
# abstract (for media & collection base classes)
@staticmethod
def check_uri(uri: str) -> Optional[E]:
raise NotImplementedError()
def _create_object(self, data: ExtractedData[T]) -> E:
raise NotImplementedError()
def _load_object(self, data: ExtractedData[T]) -> E:
raise NotImplementedError()
# abstract (for specific extractor classes)
#def uri_suitable(self, uri: str) -> bool:
# raise NotImplementedError()
def can_extract_offline(self, uri: str, cache: Dict = None) -> bool:
return False
def _cache_expired(self, date: datetime) -> bool:
return False
def _extract_offline_only(self, uri: str, cache: Dict = None) -> ExtractedData[T]:
raise NotImplementedError()
def _extract_online(self, uri: str, cache: Dict = None) -> ExtractedData[T]:
raise NotImplementedError()
def _update_object_raw(self, object: E, data: T):
raise NotImplementedError()
def _update_hook(self, object: E, data: ExtractedData[T]):
return None
# defined
def _extract_offline(self, uri: str, cache: Dict = None) -> ExtractedData[T]:
return self._extract_offline_only(uri, cache) if self.can_extract_offline(uri, cache) else self._extract_online(uri, cache)
def _extract_required(self, data: ExtractedData[T]) -> ExtractedData[T]:
if data.has_data:
return data
return self._extract_online(data.object_uri, data.cache)
def _update_object(self, object: E, data: ExtractedData[T]) -> E:
object.extractor_cache = data.cache
object.uri = data.object_uri
object.add_uris((data.object_uri,))
self._update_object_raw(object, data.data)
self._update_hook(object, data)
return object
def update_object(self, object: E, check_cache_expired: bool = True) -> E:
if object.extractor_cache_date and check_cache_expired and not self._cache_expired(object.extractor_cache_date):
return object
data = self._extract_online(object.uri, object.extractor_cache)
logging.debug(f"Updating info for media: {data!r}")
return self._update_object(object, data)
def store_object(self, data: ExtractedData[T]) -> E:
object = self._load_object(data)
if object:
logging.debug(f"Found object already in database: {data!r}")
return object
data = self._extract_required(data)
logging.debug(f"Store info for object: {data!r}")
object = self._create_object(data)
return self._update_object(object, data)
def extract_and_store(self, uri: str) -> E:
object = self.check_uri(uri)
if object is not None:
return object
return self.store_object(self._extract_offline(uri))

@ -0,0 +1,18 @@
from __future__ import annotations
from typing import Dict
from ...models import MediaElement
from .base import MediaExtractor
from .ytdl import YtdlMediaExtractor
MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
"ytdl": YtdlMediaExtractor(),
}
def media_extract_uri(extractor_name: str, uri: str) -> MediaElement:
elem: MediaElement = MediaExtractor.check_uri(uri)
if not elem:
elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri)
return elem

@ -0,0 +1,75 @@
from __future__ import annotations
import logging
from typing import Dict, Optional, TypeVar
from ...models import MediaCollection, MediaElement, MediaUriMapping
from ..generic import AuthorExtractedData, ExtractedData, GeneralExtractor
from ..collection.base import CollectionExtractor
T = TypeVar("T")
class MediaExtractor(GeneralExtractor[MediaElement, T]):
# abstract
def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
return None
# defined
@staticmethod
def check_uri(uri: str) -> Optional[MediaElement]:
mapping: MediaUriMapping = MediaUriMapping.get(uri=uri)
if mapping:
return mapping.element
elem: MediaElement = MediaElement.get(uri=uri)
if elem:
logging.warning(
f"Add missing URI mapping entry for uri {uri!r}, " +
"this should not happen at this point and is considered a bug"
)
elem.add_uris((uri,))
return elem
return None
def _create_object(self, data: ExtractedData[T]) -> MediaElement:
return data.create_media()
def _load_object(self, data: ExtractedData[T]) -> MediaElement:
return data.load_media()
def _create_author_collection(self, author_data: AuthorExtractedData) -> MediaCollection:
collection = author_data.create_collection()
collection.add_uris((author_data.object_uri,))
collection.keep_updated = False
collection.watch_in_order = False
return collection
def _lookup_author_collection(self, author_data: AuthorExtractedData) -> Optional[MediaCollection]:
return CollectionExtractor.check_uri(
uri=author_data.object_uri,
) or MediaCollection.get(
extractor_name=author_data.extractor_name,
extractor_key=author_data.object_key,
)
def _get_author_collection(self, author_data: AuthorExtractedData) -> MediaCollection:
collection = self._lookup_author_collection(author_data)
if collection is None:
collection = self._create_author_collection(author_data)
if not collection.title or collection.title.startswith(f"(author:{author_data.extractor_name}) "):
collection.title = f"(author:{author_data.extractor_name}) {author_data.author_name}"
return collection
def _add_to_author_collection(self, element: MediaElement, data: Dict):
author_data = self._get_author_data(data)
if author_data is None or not author_data.is_valid:
return
collection = self._get_author_collection(author_data)
collection.add_episode(element)
def _update_hook(self, object: MediaElement, data: ExtractedData[T]):
self._add_to_author_collection(object, data.data)

@ -0,0 +1,93 @@
from __future__ import annotations
import json
from datetime import datetime
import logging
import subprocess
from typing import Dict, List, Optional
from jsoncache import ApplicationCache
from ...common import call
from ...models import MediaElement
from ..generic import AuthorExtractedData, ExtractedData, ExtractionError
from .base import MediaExtractor
cache = ApplicationCache(app_name="entertainment-decider-ytdl", create_cache_dir=True, default_max_age=7*86400)
cache.clean_cache()
YTDL_CALL = [
"yt-dlp",
]
class YtdlErrorException(subprocess.CalledProcessError):
pass
def ytdl_call(args: List[str]) -> dict:
proc = call(YTDL_CALL + args, check=False)
if proc.returncode != 0:
raise YtdlErrorException(
returncode=proc.returncode,
cmd=args,
output=proc.stdout,
stderr=proc.stderr,
)
return json.loads(proc.stdout.strip())
@cache.cache_json()
def get_video_info(uri: str) -> dict:
return ytdl_call([
"--no-playlist",
"--dump-json",
uri,
])
@cache.cache_json()
def get_playlist_info(uri: str) -> dict:
return ytdl_call(uri)
class YtdlMediaExtractor(MediaExtractor[Dict]):
def __init__(self):
super().__init__("ytdl")
def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
video_extractor_key = data.get("extractor_key") or data["ie_key"]
author_key = data.get("channel_id") or data.get("uploader_id")
author_name = data.get("channel") or data.get("uploader") or data.get("uploader_id")
return AuthorExtractedData(
object_uri = data.get("channel_url") or data.get("uploader_url"),
extractor_name = self.name,
object_key = f"author:{video_extractor_key}:{author_key}" if author_key else None,
author_name = f"{video_extractor_key}: {author_name}" if author_name else None,
)
def _extract_online(self, uri: str, cache: Dict) -> ExtractedData[Dict]:
if cache:
logging.debug(f"Use preloaded cache to get infos of video {uri!r}")
vid_data = cache
else:
logging.info(f"Request info using youtube-dl for {uri!r}")
try:
vid_data = get_video_info(uri)
except YtdlErrorException as e:
raise ExtractionError from e
if vid_data.get("is_live", False):
raise ExtractionError("Video is live, so pass extraction")
ytdl_extractor_key = vid_data.get("extractor_key") or vid_data["ie_key"]
ytdl_video_id = vid_data["id"]
return ExtractedData[Dict](
object_uri=uri,
extractor_name=self.name,
object_key=f"{ytdl_extractor_key}:{ytdl_video_id}",
data=vid_data,
cache=None,
)
def _update_object_raw(self, object: MediaElement, data: Dict) -> str:
object.title = f"{data['title']} - {data['uploader']}" if "uploader" in data else data["title"]
object.release_date = datetime.strptime(data["upload_date"], "%Y%m%d")
object.length = int(data["duration"])

@ -0,0 +1,308 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
import logging
from typing import Dict, Iterable, List, Optional, Set
from pony import orm
db = orm.Database()
####
## Model Extensions
####
@dataclass
class CollectionStats:
to_watch_count: int
ignored_count: int # but not watched
watched_count: int
to_watch_seconds: int
ignored_seconds: int # but not watched
watched_seconds: int
@property
def full_count(self) -> int:
return self.to_watch_count + self.ignored_count + self.watched_count
@property
def full_seconds(self) -> int:
return self.to_watch_seconds + self.ignored_seconds + self.watched_seconds
@classmethod
def from_collection(cls, collection: MediaCollection) -> CollectionStats:
to_watch_count = 0
ignored_count = 0
watched_count = 0
to_watch_seconds = 0
ignored_seconds = 0
watched_seconds = 0
for link in collection.media_links:
media = link.element
if media.watched:
watched_count += 1
watched_seconds += media.length
else:
watched_seconds += media.progress
if media.ignored:
ignored_count += 1
ignored_seconds += media.left_length
else:
to_watch_count += 1
to_watch_seconds += media.left_length
return CollectionStats(
to_watch_count=to_watch_count,
ignored_count=ignored_count,
watched_count=watched_count,
to_watch_seconds=to_watch_seconds,
ignored_seconds=ignored_seconds,
watched_seconds=watched_seconds,
)
####
## Models
####
class Tag(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
title: str = orm.Required(str)
notes: str = orm.Optional(str)
use_for_preferences: bool = orm.Required(bool, default=True)
_collection_list: Iterable[MediaCollection] = orm.Set(lambda: MediaCollection)
_media_list: Iterable[MediaElement] = orm.Set(lambda: MediaElement)
class MediaCollectionLink(db.Entity):
collection: MediaCollection = orm.Required(lambda: MediaCollection)
element: MediaElement = orm.Required(lambda: MediaElement)
orm.PrimaryKey(collection, element)
season: int = orm.Required(int, default=0)
episode: int = orm.Required(int, default=0)
orm.composite_index(season, episode)
@property
def element_id(self):
return self.element.id
@property
def element_release_date(self):
return self.element.release_date
@staticmethod
def sorted(iterable: Iterable[MediaCollectionLink]) -> List[MediaCollectionLink]:
return sorted(iterable, key=lambda m: (m.season, m.episode, m.element_release_date, m.element_id))
natural_order = (season, episode, element_release_date, element_id) # unusuable due to ponyorm, see https://github.com/ponyorm/pony/issues/612
class MediaElement(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True)
title: str = orm.Optional(str)
notes: str = orm.Optional(str)
release_date: datetime = orm.Optional(datetime)
extractor_name: str = orm.Required(str)
extractor_key: str = orm.Required(str)
orm.composite_key(extractor_name, extractor_key)
_extractor_cache: Dict = orm.Optional(orm.Json, nullable=True)
extractor_cache_date: datetime = orm.Optional(datetime)
watched: bool = orm.Required(bool, default=False)
ignored: bool = orm.Required(bool, default=False)
progress: int = orm.Required(int, default=0)
length: int = orm.Optional(int)
tag_list : Iterable[Tag] = orm.Set(lambda: Tag)
_uris: Iterable[MediaUriMapping] = orm.Set(lambda: MediaUriMapping)
collection_links: Iterable[MediaCollectionLink] = orm.Set(lambda: MediaCollectionLink)
def extractor_cache_valid(self, max_age: timedelta):
return (datetime.now() - self.extractor_cache_date) < max_age
def __get_cache(self):
return self._extractor_cache
def __set_cache(self, cache: Dict):
self._extractor_cache = cache
self.extractor_cache_date = datetime.now()
extractor_cache = property(__get_cache, __set_cache)
@property
def left_length(self) -> int:
return self.length - self.progress
@property
def ignored_recursive(self) -> bool:
links = orm.select(link for link in MediaCollectionLink if link.episode == self and link.collection.ignored == True)
return len(links) > 0
@property
def ignored_any(self) -> bool:
return self.ignored or self.ignored_recursive
@property
def skip_over(self) -> bool:
return self.ignored or self.watched
@property
def can_considered(self) -> bool:
if self.skip_over:
return False
for link in self.collection_links:
if link.collection.watch_in_order and self != link.collection.next_episode.element:
return False
return True
@property
def inherited_tags(self) -> Set[Tag]:
result = set()
for link in self.collection_links:
result |= link.collection.all_tags
return result
@property
def all_tags(self) -> Iterable[Tag]:
return set(self.tag_list) | self.inherited_tags
def merge_to(self, other: MediaElement):
if self.watched:
other.watched = True
if self.ignored:
other.ignored = True
if self.progress >= 0 and other.progress <= 0:
other.progress = self.progress
for uri_map in self._uris:
uri_map.element = other
for link in self.collection_links:
if not MediaCollectionLink.get(collection=link.collection, element=other):
link.element = other
self.delete() # will also delete still existing uri mappings and collection links
orm.flush()
def add_uris(self, uri_list: Iterable[str]):
for uri in set(uri_list):
mapping: MediaUriMapping = MediaUriMapping.get(uri=uri)
if not mapping:
logging.debug(f"Add URI mapping {uri!r} to media {self.id!r}")
MediaUriMapping(
uri=uri,
element=self,
)
continue
if mapping.element != self:
raise Exception(f"URI duplicated for two different media's: {uri}") # TODO may replace with merge call
orm.flush()
@property
def info_link(self):
return f"/media/{self.id}"
class MediaUriMapping(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True)
element: MediaElement = orm.Required(MediaElement)
class MediaCollection(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True)
title: str = orm.Optional(str)
notes: str = orm.Optional(str)
release_date: datetime = orm.Optional(datetime)
extractor_name: str = orm.Required(str)
extractor_key: str = orm.Required(str)
orm.composite_key(extractor_name, extractor_key)
_extractor_cache: Dict = orm.Optional(orm.Json, nullable=True)
extractor_cache_date: datetime = orm.Optional(datetime)
keep_updated: bool = orm.Required(bool, default=False)
watch_in_order_auto: bool = orm.Required(bool, default=True)
ignored: bool = orm.Required(bool, default=False)
watch_in_order: bool = orm.Required(bool, default=True)
tag_list: Iterable[Tag] = orm.Set(lambda: Tag)
_uris: Iterable[CollectionUriMapping] = orm.Set(lambda: CollectionUriMapping)
media_links: Iterable[MediaCollectionLink] = orm.Set(MediaCollectionLink)
def extractor_cache_valid(self, max_age: timedelta):
return (datetime.now() - self.extractor_cache_date) < max_age
def __get_cache(self):
return self._extractor_cache
def __set_cache(self, cache: Dict):
self._extractor_cache = cache
self.extractor_cache_date = datetime.now()
extractor_cache = property(__get_cache, __set_cache)
@property
def next_episode(self) -> Optional[MediaCollectionLink]:
#return orm \
# .select(link for link in self.media_links if not link.element.watched) \
# .order_by(*MediaCollectionLink.natural_order) \
# .first()
episodes = MediaCollectionLink.sorted(orm.select(link for link in self.media_links if not link.element.watched and not link.element.ignored))
return episodes[0] if len(episodes) > 0 else None
@property
def completed(self) -> bool:
return self.next_episode is None
@property
def all_tags(self) -> Iterable[Tag]:
return self.tag_list
@property
def stats(self) -> CollectionStats:
return CollectionStats.from_collection(self)
def add_episode(self, media: MediaElement, season: int = 0, episode: int = 0) -> MediaCollectionLink:
link: MediaCollectionLink = MediaCollectionLink.get(collection=self, element=media)
if link is None:
link = MediaCollectionLink(collection=self, element=media)
link.season, link.episode = season, episode
orm.flush()
return link
def add_uris(self, uri_list: Iterable[str]):
for uri in set(uri_list):
mapping: CollectionUriMapping = CollectionUriMapping.get(uri=uri)
if not mapping:
logging.debug(f"Add URI mapping {uri!r} to collection {self.id!r}")
CollectionUriMapping(
uri=uri,
element=self,
)
continue
if mapping.element != self:
raise Exception(f"URI duplicated for two different collections's: {uri}") # TODO may replace with merge call
orm.flush()
@property
def info_link(self):
return f"/collection/{self.id}"
class CollectionUriMapping(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True)
element: MediaCollection = orm.Required(MediaCollection)

@ -0,0 +1,10 @@
Flask[async]>=2.0.1
pony>=0.7.14
pyyaml>=5.4.1
rss-parser>=0.2.3
youtube-dl>=2021.6.6
youtube-search-python>=1.4.9
# custom, local requirements; TODO remove or change to persistent dependency
/home/zocker/Repositories/python-jsoncache
/home/zocker/Repositories/tinytinypy

@ -0,0 +1,63 @@
<!DOCTYPE html>
<html>
{% set title = collection.title %}
<head>
<meta charset="utf-8"/>
<title>{{ title }}</title>
<style>
table tr th, table tr td {
margin: 0;
padding: .2em;
border: solid black 1px;
}
</style>
</head>
<body>
<a href="/collection">&lt;- back to list</a>
<h1>{{ title }}</h1>
<h2>Properties</h2>
<ul>
<li>Watch In Order: {{ collection.watch_in_order | tenary("Yes", "no") }} {%- if collection.watch_in_order_auto %} (automatic){% endif %}</li>
<li>Keep Updated: {{ collection.keep_updated | tenary("Yes", "no") }}</li>
{% if collection.watch_in_order %}
<li>
Next Episode:
{% set link = collection.next_episode %}
{% if link %}
<a href="{{ link.element.info_link }}">{{ link.element.title }}</a>
{%- if link.season != 0 -%}
, Season {{ link.season }}
{% endif %}
{%- if link.episode != 0 -%}
, Episode {{ link.episode }}
{% endif %}
{% else %}
no next episode
{% endif %}
</li>
{% endif %}
</ul>
<h2>Notes</h2>
<pre>{{ collection.notes or "" }}</pre>
<h2>Episodes</h2>
<ul>
{% for link in media_links %}
<li>
<a href="{{ link.element.info_link }}">{{ link.element.title }}</a>
{%- if link.season != 0 -%}
, Season {{ link.season }}
{% endif %}
{%- if link.episode != 0 -%}
, Episode {{ link.episode }}
{% endif %}
</li>
{% endfor %}
</ul>
<h2>Links</h2>
<ul>
{% for link in collection._uris %}
<li>{{ link.uri | as_link }} {% if collection.uri == link.uri %}*{% endif %}</li>
{% endfor %}
</ul>
</body>
</html>

@ -0,0 +1,43 @@
<!DOCTYPE html>
<html>
{% set title = collection_list | length | string + " Collections known" %}
<head>
<meta charset="utf-8"/>
<title>{{ title }}</title>
<style>
table tr th, table tr td {
margin: 0;
padding: .2em;
border: solid black 1px;
}
</style>
</head>
<body>
<h1>{{ title }}</h1>
<table>
<tr>
<th>Title</th>
<th>Date</th>
<th>Count</th>
<th>Watched</th>
<th>To Watch</th>
</tr>
{% for collection in collection_list %}
{% set stats = collection.stats %}
<tr>
<td><a href="{{ collection.info_link }}">{{ collection.title }}</a></td>
<td>
{% if collection.release_date %}
{{ collection.release_date.strftime("%d.%m.%Y") }}
{% else %}
unknown
{% endif %}
</td>
<td>{{ stats.full_count }}</td>
<td>{{ stats.watched_count }}</td>
<td>{{ stats.to_watch_count }}</td>
</tr>
{% endfor %}
</table>
</body>
</html>

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html>
{% set title = element.title %}
<head>
<meta charset="utf-8"/>
<title>{{ title }}</title>
<style>
table tr th, table tr td {
margin: 0;
padding: .2em;
border: solid black 1px;
}
</style>
</head>
<body>
<a href="/media">&lt;- back to list</a>
<h1>{{ title }}</h1>
<h2>Notes</h2>
<pre>{{ element.notes or "" }}</pre>
<h2>Properties</h2>
<ul>
<li>Can be considered: {{ element.can_considered | tenary("Yes", "no") }}</li>
</ul>
<h2>Part of Collections</h2>
<ul>
{% for link in element.collection_links %}
<li>
<a href="{{ link.collection.info_link }}">{{ link.collection.title }}</a>
{%- if link.season != 0 -%}
, Season {{ link.season }}
{% endif %}
{%- if link.episode != 0 -%}
, Episode {{ link.episode }}
{% endif %}
</li>
{% endfor %}
</ul>
<h2>Links</h2>
<ul>
{% for link in element._uris %}
<li>{{ link.uri | as_link }} {% if element.uri == link.uri %}*{% endif %}</li>
{% endfor %}
</ul>
</body>
</html>

@ -0,0 +1,53 @@
<!DOCTYPE html>
<html>
{% set title = media_list | length | string + " Videos known" %}
<head>
<meta charset="utf-8"/>
<title>{{ title }}</title>
<style>
table tr th, table tr td {
margin: 0;
padding: .2em;
border: solid black 1px;
}
</style>
</head>
<body>
<h1>{{ title }}</h1>
<ul>
<li>
Full length:
{{ media_list | map(attribute='length') | sum }}
seconds
</li>
</ul>
<table>
<tr>
<th>Title</th>
<th>Date</th>
<th>Progress</th>
<th>Length</th>
<th>Consider</th>
<th>Link</th>
</tr>
{% for media in media_list %}
<tr>
<td><a href="{{ media.info_link }}">{{ media.title }}</a></td>
<td>{{ media.release_date.strftime("%d.%m.%Y") }}</td>
<td>
{% if media.watched %}
completed
{% elif media.progress <= 0 %}
not started
{% else %}
{{ media.progress }}&nbsp;s
{% endif %}
</td>
<td>{{ media.length }}&nbsp;s</td>
<td>{{ media.can_considered | tenary("Yes", "no") }}</td>
<td><a href="{{ media.uri }}">link</a></td>
</tr>
{% endfor %}
</table>
</body>
</html>
Loading…
Cancel
Save