Split models/entities into multiple files

master
Felix Stupp 1 year ago
parent a3ea1548aa
commit 44a630c074
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -1,3 +1,8 @@
from .cache_tables import (
setup_custom_tables,
update_element_lookup_cache,
)
from .custom_types import ( from .custom_types import (
Query, Query,
SafeStr, SafeStr,
@ -15,12 +20,7 @@ from .entities import (
Tag, Tag,
Tagable, Tagable,
TagKey, TagKey,
are_multiple_considered,
db, db,
get_all_considered,
get_all_elements_tags_recursive,
setup_custom_tables,
update_element_lookup_cache,
) )
from .predefined_tags import ( from .predefined_tags import (
@ -29,6 +29,12 @@ from .predefined_tags import (
predefined_video_tag, predefined_video_tag,
) )
from .sql_speedups import (
are_multiple_considered,
get_all_considered,
get_all_elements_tags_recursive,
)
from .thumbnails import ( from .thumbnails import (
thumbnail_sort_key, thumbnail_sort_key,
) )

@ -0,0 +1,143 @@
from __future__ import annotations
import logging
from typing import (
List,
Mapping,
)
from pony import orm
from .custom_types import SafeStr
from .entities import (
db,
)
from .sql_helpers import (
sql_cleanup,
sql_where_in,
)
from .sql_names import (
COLLECTION_TABLE,
COLLECTION_LINK_TABLE,
ELEMENT_TABLE,
ELEMENT_BLOCKING_CACHE_TABLE,
)
from ..common import trim
from ..extras import LazyValue
CUSTOM_TABLE_DEFINITIONS: Mapping[SafeStr, LazyValue[str]] = {
SafeStr(table_name): lambda: trim(table_sql())
for table_name, table_sql in {
ELEMENT_BLOCKING_CACHE_TABLE: lambda: f"""
CREATE TABLE {ELEMENT_BLOCKING_CACHE_TABLE}(
collection INT(11) NOT NULL,
element1 INT(11) NOT NULL,
element2 INT(11) NOT NULL
) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{COLLECTION_TABLE} c
INNER JOIN {COLLECTION_LINK_TABLE} l1 ON
c.id = l1.collection
INNER JOIN {COLLECTION_LINK_TABLE} l2 ON
c.id = l2.collection
INNER JOIN {ELEMENT_TABLE} e1 ON
l1.element = e1.id
INNER JOIN {ELEMENT_TABLE} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
GROUP BY
collection,
element1,
element2;
ALTER TABLE
`{ELEMENT_BLOCKING_CACHE_TABLE}` ADD PRIMARY KEY(`element1`, `element2`, `collection`);
ALTER TABLE
`{ELEMENT_BLOCKING_CACHE_TABLE}` ADD INDEX(`collection`);
""",
}.items()
}
def table_exists(table_name: SafeStr) -> bool:
return db.provider.table_exists(
connection=db.get_connection(),
table_name=table_name,
)
@orm.db_session
def setup_custom_tables() -> None:
"""
Creates & fills custom tables (especially cache tables) if they do not exist.
This should not destroy already existing data and should behave indempotent.
"""
for table_name, table_sql in CUSTOM_TABLE_DEFINITIONS.items():
if not table_exists(table_name):
db.execute(table_sql())
def update_element_lookup_cache(collection_ids: List[int] = []):
logging.info(
f"Rebuild {ELEMENT_BLOCKING_CACHE_TABLE} for {len(collection_ids) if collection_ids else 'all'} collections …"
)
def filter_clause(c_id: str):
return sql_where_in(c_id, collection_ids) if collection_ids else "true"
orm.flush()
sql = [
f"""
DELETE QUICK FROM {ELEMENT_BLOCKING_CACHE_TABLE}
WHERE {filter_clause("collection")};
""",
f"""
INSERT INTO {ELEMENT_BLOCKING_CACHE_TABLE} (collection, element1, element2) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{COLLECTION_TABLE} c
INNER JOIN {COLLECTION_LINK_TABLE} l1 ON
c.id = l1.collection
INNER JOIN {COLLECTION_LINK_TABLE} l2 ON
c.id = l2.collection
INNER JOIN {ELEMENT_TABLE} e1 ON
l1.element = e1.id
INNER JOIN {ELEMENT_TABLE} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
AND {filter_clause("c.id")}
GROUP BY
collection,
element1,
element2
""",
]
for q in sql:
db.execute(sql_cleanup(q))

@ -3,16 +3,11 @@ from __future__ import annotations
from abc import abstractproperty from abc import abstractproperty
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import cache
import itertools
import logging import logging
import re
from typing import ( from typing import (
Iterable, Iterable,
List, List,
Mapping,
Optional, Optional,
Sequence,
Set, Set,
Tuple, Tuple,
TypeVar, TypeVar,
@ -22,11 +17,9 @@ import magic
import requests import requests
from pony import orm from pony import orm
from .custom_types import Query, SafeStr from .custom_types import Query
from .thumbnails import THUMBNAIL_ALLOWED_TYPES, THUMBNAIL_HEADERS from .thumbnails import THUMBNAIL_ALLOWED_TYPES, THUMBNAIL_HEADERS
from .extras import UriHolder from .extras import UriHolder
from ..common import trim
from ..extras import LazyValue
from ..preferences.tag_protocol import TagableProto, TagProto from ..preferences.tag_protocol import TagableProto, TagProto
db = orm.Database() db = orm.Database()
@ -493,6 +486,9 @@ class MediaElement(db.Entity, UriHolder, Tagable):
def can_considered(self) -> bool: def can_considered(self) -> bool:
DIRECT_SQL = True DIRECT_SQL = True
if DIRECT_SQL: if DIRECT_SQL:
# import here because of circular dependency
from .sql_speedups import is_considered
return is_considered(self.id) return is_considered(self.id)
if self.skip_over: if self.skip_over:
return False return False
@ -928,266 +924,3 @@ class CollectionUriMapping(db.Entity):
id: int = orm.PrimaryKey(int, auto=True) id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True) uri: str = orm.Required(str, unique=True)
element: MediaCollection = orm.Required(MediaCollection) element: MediaCollection = orm.Required(MediaCollection)
MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE = "element_lookup_cache"
SQL_WHITESPACE_PATTERN = re.compile(r"(\s|\n)+")
def sql_cleanup(sql: str) -> str:
return SQL_WHITESPACE_PATTERN.sub(" ", sql).strip()
def sql_where_in(id: str, id_list: Iterable[str | int]) -> str:
return f"{id} IN ({','.join(str(i) for i in id_list)})"
# TODO reducing cache table to only contain videos not watched/ignored (not huge speedup)
# TODO add bool for (not)? blocking to direct dependencies (similar to above) (not huge speedup)
def sql_is_considered(elem_id: str, use_cache: bool = True) -> str:
# NOT EXISTS seems worlds better then making a OUTER JOIN
return sql_cleanup(
f"""
NOT EXISTS (
"""
+ (
f"""
SELECT c.element2
FROM {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} c
INNER JOIN {MediaElement._table_} m2 ON c.element1 = m2.id
WHERE c.element2 = {elem_id} AND NOT (m2.watched OR m2.ignored)
"""
if use_cache
else f"""
SELECT *
FROM {MediaElement._table_} look_elem
INNER JOIN {MediaCollectionLink._table_} link ON link.element = look_elem.id
INNER JOIN {MediaCollection._table_} coll ON coll.id = link.collection
INNER JOIN {MediaCollectionLink._table_} coll_link ON coll_link.collection = coll.id
INNER JOIN {MediaElement._table_} coll_elem ON coll_elem.id = coll_link.element
WHERE look_elem.id = {elem_id}
AND coll.watch_in_order
AND NOT (coll_elem.watched OR coll_elem.ignored)
AND (coll_link.season, coll_link.episode, coll_elem.release_date, coll_elem.id) < (link.season, link.episode, look_elem.release_date, look_elem.id)
"""
)
+ f"""
) AND NOT EXISTS (
SELECT *
FROM mediaelement_mediaelement m_m
INNER JOIN {MediaElement._table_} m ON m_m.mediaelement = m.id
WHERE m_m.mediaelement_2 = {elem_id} AND NOT (m.watched OR m.ignored)
)
"""
)
def is_considered(elem_id: int) -> bool:
return db.exists(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE elem.id = {elem_id}
AND NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered('elem.id')})
"""
)
)
def are_multiple_considered(elem_ids: Iterable[int]) -> Mapping[int, bool]:
res = {
r[0]
for r in db.execute(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered("elem.id")})
"""
)
)
}
return {elem_id: elem_id in res for elem_id in elem_ids}
def get_all_considered(
order_by: str = "NULL", filter_by: str = "true"
) -> List[MediaElement]:
return MediaElement.select_by_sql(
sql_cleanup(
f"""
SELECT elem.*
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND {filter_by}
AND ({sql_is_considered("elem.id")})
ORDER BY {order_by}
"""
)
)
def get_all_elements_tags_recursive() -> Mapping[int, Sequence[Tag]]:
elem_tag_query: Iterable[Tuple[int, int]] = db.execute(
"""
WITH RECURSIVE found_tag (mediaelement_id, tag_id) AS
(
SELECT mediaelement_tag.mediaelement, mediaelement_tag.tag
FROM mediaelement_tag
UNION
SELECT mediacollectionlink.element, mediacollection_tag.tag
FROM mediacollectionlink
JOIN mediacollection_tag ON mediacollectionlink.collection = mediacollection_tag.mediacollection
UNION
SELECT found_tag.mediaelement_id, tag_tag.tag_2
FROM found_tag
JOIN tag_tag ON found_tag.tag_id = tag_tag.tag
)
SELECT found_tag.mediaelement_id, found_tag.tag_id
FROM found_tag
JOIN tag ON found_tag.tag_id = tag.id
WHERE tag.use_for_preferences
ORDER BY mediaelement_id, tag_id;
"""
)
@cache
def get_tag(tag_id: int) -> Tag:
return Tag[tag_id]
return {
elem_id: [get_tag(tag_id) for _, tag_id in group_iter]
for elem_id, group_iter in itertools.groupby(
elem_tag_query, key=lambda row: row[0]
)
}
def update_element_lookup_cache(collection_ids: List[int] = []):
logging.info(
f"Rebuild {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} for {len(collection_ids) if collection_ids else 'all'} collections …"
)
def filter_clause(c_id: str):
return sql_where_in(c_id, collection_ids) if collection_ids else "true"
orm.flush()
sql = [
f"""
DELETE QUICK FROM {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}
WHERE {filter_clause("collection")};
""",
f"""
INSERT INTO {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} (collection, element1, element2) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{MediaCollection._table_} c
INNER JOIN {MediaCollectionLink._table_} l1 ON
c.id = l1.collection
INNER JOIN {MediaCollectionLink._table_} l2 ON
c.id = l2.collection
INNER JOIN {MediaElement._table_} e1 ON
l1.element = e1.id
INNER JOIN {MediaElement._table_} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
AND {filter_clause("c.id")}
GROUP BY
collection,
element1,
element2
""",
]
for q in sql:
db.execute(sql_cleanup(q))
####
## Custom Table Framework
####
CUSTOM_TABLE_DEFINITIONS: Mapping[SafeStr, LazyValue[str]] = {
SafeStr(table_name): lambda: trim(table_sql())
for table_name, table_sql in {
MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE: lambda: f"""
CREATE TABLE {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}(
collection INT(11) NOT NULL,
element1 INT(11) NOT NULL,
element2 INT(11) NOT NULL
) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{MediaCollection._table_} c
INNER JOIN {MediaCollectionLink._table_} l1 ON
c.id = l1.collection
INNER JOIN {MediaCollectionLink._table_} l2 ON
c.id = l2.collection
INNER JOIN {MediaElement._table_} e1 ON
l1.element = e1.id
INNER JOIN {MediaElement._table_} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
GROUP BY
collection,
element1,
element2;
ALTER TABLE
`{MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}` ADD PRIMARY KEY(`element1`, `element2`, `collection`);
ALTER TABLE
`{MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}` ADD INDEX(`collection`);
""",
}.items()
}
def table_exists(table_name: SafeStr) -> bool:
return db.provider.table_exists(
connection=db.get_connection(),
table_name=table_name,
)
@orm.db_session
def setup_custom_tables() -> None:
"""
Creates & fills custom tables (especially cache tables) if they do not exist.
This should not destroy already existing data and should behave indempotent.
"""
for table_name, table_sql in CUSTOM_TABLE_DEFINITIONS.items():
if not table_exists(table_name):
db.execute(table_sql())

@ -0,0 +1,17 @@
from __future__ import annotations
import re
from typing import (
Iterable,
)
SQL_WHITESPACE_PATTERN = re.compile(r"(\s|\n)+")
def sql_cleanup(sql: str) -> str:
return SQL_WHITESPACE_PATTERN.sub(" ", sql).strip()
def sql_where_in(id: str, id_list: Iterable[str | int]) -> str:
return f"{id} IN ({','.join(str(i) for i in id_list)})"

@ -0,0 +1,14 @@
COLLECTION_URI_MAPPING_TABLE = "collectionurimapping"
COLLECTION_TABLE = "mediacollection"
COLLECTION_LINK_TABLE = "mediacollectionlink"
COLLECTION_TAG_MAPPING_TABLE = "mediacollection_tag"
ELEMENT_TABLE = "mediaelement"
ELEMENT_BLOCKING_CACHE_TABLE = "element_lookup_cache"
ELEMENT_BLOCKING_MAPPING_TABLE = "mediaelement_mediaelement"
ELEMENT_TAG_MAPPING_TABLE = "mediaelement_tag"
ELEMENT_URI_MAPPING_TABLE = "mediaurimapping"
TAG_TABLE = "tag"
TAG_HIERACHY_TABLE = "tag_tag"
TAG_KEY_MAPPING_TABLE = "tagkey"
THUMBNAIL_TABLE = "mediathumbnail"
THUMBNAIL_CACHE_TABLE = "mediathumbnailcache"

@ -0,0 +1,53 @@
from __future__ import annotations
from .entities import (
MediaCollection,
MediaCollectionLink,
MediaElement,
)
from .sql_helpers import (
sql_cleanup,
)
from .sql_names import (
ELEMENT_BLOCKING_CACHE_TABLE,
)
# TODO reducing cache table to only contain videos not watched/ignored (not huge speedup)
# TODO add bool for (not)? blocking to direct dependencies (similar to above) (not huge speedup)
def sql_is_considered(elem_id: str, use_cache: bool = True) -> str:
# NOT EXISTS seems worlds better then making a OUTER JOIN
return sql_cleanup(
f"""
NOT EXISTS (
"""
+ (
f"""
SELECT c.element2
FROM {ELEMENT_BLOCKING_CACHE_TABLE} c
INNER JOIN {MediaElement._table_} m2 ON c.element1 = m2.id
WHERE c.element2 = {elem_id} AND NOT (m2.watched OR m2.ignored)
"""
if use_cache
else f"""
SELECT *
FROM {MediaElement._table_} look_elem
INNER JOIN {MediaCollectionLink._table_} link ON link.element = look_elem.id
INNER JOIN {MediaCollection._table_} coll ON coll.id = link.collection
INNER JOIN {MediaCollectionLink._table_} coll_link ON coll_link.collection = coll.id
INNER JOIN {MediaElement._table_} coll_elem ON coll_elem.id = coll_link.element
WHERE look_elem.id = {elem_id}
AND coll.watch_in_order
AND NOT (coll_elem.watched OR coll_elem.ignored)
AND (coll_link.season, coll_link.episode, coll_elem.release_date, coll_elem.id) < (link.season, link.episode, look_elem.release_date, look_elem.id)
"""
)
+ f"""
) AND NOT EXISTS (
SELECT *
FROM mediaelement_mediaelement m_m
INNER JOIN {MediaElement._table_} m ON m_m.mediaelement = m.id
WHERE m_m.mediaelement_2 = {elem_id} AND NOT (m.watched OR m.ignored)
)
"""
)

@ -0,0 +1,111 @@
from __future__ import annotations
from functools import cache
import itertools
from typing import (
Iterable,
List,
Mapping,
Sequence,
Tuple,
)
from .entities import (
MediaElement,
Tag,
db,
)
from .sql_helpers import (
sql_cleanup,
)
from .sql_queries import (
sql_is_considered,
)
def is_considered(elem_id: int) -> bool:
return db.exists(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE elem.id = {elem_id}
AND NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered('elem.id')})
"""
)
)
def are_multiple_considered(elem_ids: Iterable[int]) -> Mapping[int, bool]:
res = {
r[0]
for r in db.execute(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered("elem.id")})
"""
)
)
}
return {elem_id: elem_id in res for elem_id in elem_ids}
def get_all_considered(
order_by: str = "NULL",
filter_by: str = "true",
) -> List[MediaElement]:
return MediaElement.select_by_sql(
sql_cleanup(
f"""
SELECT elem.*
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND {filter_by}
AND ({sql_is_considered("elem.id")})
ORDER BY {order_by}
"""
)
)
def get_all_elements_tags_recursive() -> Mapping[int, Sequence[Tag]]:
elem_tag_query: Iterable[Tuple[int, int]] = db.execute(
"""
WITH RECURSIVE found_tag (mediaelement_id, tag_id) AS
(
SELECT mediaelement_tag.mediaelement, mediaelement_tag.tag
FROM mediaelement_tag
UNION
SELECT mediacollectionlink.element, mediacollection_tag.tag
FROM mediacollectionlink
JOIN mediacollection_tag ON mediacollectionlink.collection = mediacollection_tag.mediacollection
UNION
SELECT found_tag.mediaelement_id, tag_tag.tag_2
FROM found_tag
JOIN tag_tag ON found_tag.tag_id = tag_tag.tag
)
SELECT found_tag.mediaelement_id, found_tag.tag_id
FROM found_tag
JOIN tag ON found_tag.tag_id = tag.id
WHERE tag.use_for_preferences
ORDER BY mediaelement_id, tag_id;
"""
)
@cache
def get_tag(tag_id: int) -> Tag:
return Tag[tag_id]
return {
elem_id: [get_tag(tag_id) for _, tag_id in group_iter]
for elem_id, group_iter in itertools.groupby(
elem_tag_query, key=lambda row: row[0]
)
}
Loading…
Cancel
Save