Split models/entities into multiple files

master
Felix Stupp 1 year ago
parent a3ea1548aa
commit 44a630c074
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -1,3 +1,8 @@
from .cache_tables import (
setup_custom_tables,
update_element_lookup_cache,
)
from .custom_types import (
Query,
SafeStr,
@ -15,12 +20,7 @@ from .entities import (
Tag,
Tagable,
TagKey,
are_multiple_considered,
db,
get_all_considered,
get_all_elements_tags_recursive,
setup_custom_tables,
update_element_lookup_cache,
)
from .predefined_tags import (
@ -29,6 +29,12 @@ from .predefined_tags import (
predefined_video_tag,
)
from .sql_speedups import (
are_multiple_considered,
get_all_considered,
get_all_elements_tags_recursive,
)
from .thumbnails import (
thumbnail_sort_key,
)

@ -0,0 +1,143 @@
from __future__ import annotations
import logging
from typing import (
List,
Mapping,
)
from pony import orm
from .custom_types import SafeStr
from .entities import (
db,
)
from .sql_helpers import (
sql_cleanup,
sql_where_in,
)
from .sql_names import (
COLLECTION_TABLE,
COLLECTION_LINK_TABLE,
ELEMENT_TABLE,
ELEMENT_BLOCKING_CACHE_TABLE,
)
from ..common import trim
from ..extras import LazyValue
CUSTOM_TABLE_DEFINITIONS: Mapping[SafeStr, LazyValue[str]] = {
SafeStr(table_name): lambda: trim(table_sql())
for table_name, table_sql in {
ELEMENT_BLOCKING_CACHE_TABLE: lambda: f"""
CREATE TABLE {ELEMENT_BLOCKING_CACHE_TABLE}(
collection INT(11) NOT NULL,
element1 INT(11) NOT NULL,
element2 INT(11) NOT NULL
) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{COLLECTION_TABLE} c
INNER JOIN {COLLECTION_LINK_TABLE} l1 ON
c.id = l1.collection
INNER JOIN {COLLECTION_LINK_TABLE} l2 ON
c.id = l2.collection
INNER JOIN {ELEMENT_TABLE} e1 ON
l1.element = e1.id
INNER JOIN {ELEMENT_TABLE} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
GROUP BY
collection,
element1,
element2;
ALTER TABLE
`{ELEMENT_BLOCKING_CACHE_TABLE}` ADD PRIMARY KEY(`element1`, `element2`, `collection`);
ALTER TABLE
`{ELEMENT_BLOCKING_CACHE_TABLE}` ADD INDEX(`collection`);
""",
}.items()
}
def table_exists(table_name: SafeStr) -> bool:
return db.provider.table_exists(
connection=db.get_connection(),
table_name=table_name,
)
@orm.db_session
def setup_custom_tables() -> None:
"""
Creates & fills custom tables (especially cache tables) if they do not exist.
This should not destroy already existing data and should behave indempotent.
"""
for table_name, table_sql in CUSTOM_TABLE_DEFINITIONS.items():
if not table_exists(table_name):
db.execute(table_sql())
def update_element_lookup_cache(collection_ids: List[int] = []):
logging.info(
f"Rebuild {ELEMENT_BLOCKING_CACHE_TABLE} for {len(collection_ids) if collection_ids else 'all'} collections …"
)
def filter_clause(c_id: str):
return sql_where_in(c_id, collection_ids) if collection_ids else "true"
orm.flush()
sql = [
f"""
DELETE QUICK FROM {ELEMENT_BLOCKING_CACHE_TABLE}
WHERE {filter_clause("collection")};
""",
f"""
INSERT INTO {ELEMENT_BLOCKING_CACHE_TABLE} (collection, element1, element2) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{COLLECTION_TABLE} c
INNER JOIN {COLLECTION_LINK_TABLE} l1 ON
c.id = l1.collection
INNER JOIN {COLLECTION_LINK_TABLE} l2 ON
c.id = l2.collection
INNER JOIN {ELEMENT_TABLE} e1 ON
l1.element = e1.id
INNER JOIN {ELEMENT_TABLE} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
AND {filter_clause("c.id")}
GROUP BY
collection,
element1,
element2
""",
]
for q in sql:
db.execute(sql_cleanup(q))

@ -3,16 +3,11 @@ from __future__ import annotations
from abc import abstractproperty
from dataclasses import dataclass
from datetime import datetime, timedelta
from functools import cache
import itertools
import logging
import re
from typing import (
Iterable,
List,
Mapping,
Optional,
Sequence,
Set,
Tuple,
TypeVar,
@ -22,11 +17,9 @@ import magic
import requests
from pony import orm
from .custom_types import Query, SafeStr
from .custom_types import Query
from .thumbnails import THUMBNAIL_ALLOWED_TYPES, THUMBNAIL_HEADERS
from .extras import UriHolder
from ..common import trim
from ..extras import LazyValue
from ..preferences.tag_protocol import TagableProto, TagProto
db = orm.Database()
@ -493,6 +486,9 @@ class MediaElement(db.Entity, UriHolder, Tagable):
def can_considered(self) -> bool:
DIRECT_SQL = True
if DIRECT_SQL:
# import here because of circular dependency
from .sql_speedups import is_considered
return is_considered(self.id)
if self.skip_over:
return False
@ -928,266 +924,3 @@ class CollectionUriMapping(db.Entity):
id: int = orm.PrimaryKey(int, auto=True)
uri: str = orm.Required(str, unique=True)
element: MediaCollection = orm.Required(MediaCollection)
MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE = "element_lookup_cache"
SQL_WHITESPACE_PATTERN = re.compile(r"(\s|\n)+")
def sql_cleanup(sql: str) -> str:
return SQL_WHITESPACE_PATTERN.sub(" ", sql).strip()
def sql_where_in(id: str, id_list: Iterable[str | int]) -> str:
return f"{id} IN ({','.join(str(i) for i in id_list)})"
# TODO reducing cache table to only contain videos not watched/ignored (not huge speedup)
# TODO add bool for (not)? blocking to direct dependencies (similar to above) (not huge speedup)
def sql_is_considered(elem_id: str, use_cache: bool = True) -> str:
# NOT EXISTS seems worlds better then making a OUTER JOIN
return sql_cleanup(
f"""
NOT EXISTS (
"""
+ (
f"""
SELECT c.element2
FROM {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} c
INNER JOIN {MediaElement._table_} m2 ON c.element1 = m2.id
WHERE c.element2 = {elem_id} AND NOT (m2.watched OR m2.ignored)
"""
if use_cache
else f"""
SELECT *
FROM {MediaElement._table_} look_elem
INNER JOIN {MediaCollectionLink._table_} link ON link.element = look_elem.id
INNER JOIN {MediaCollection._table_} coll ON coll.id = link.collection
INNER JOIN {MediaCollectionLink._table_} coll_link ON coll_link.collection = coll.id
INNER JOIN {MediaElement._table_} coll_elem ON coll_elem.id = coll_link.element
WHERE look_elem.id = {elem_id}
AND coll.watch_in_order
AND NOT (coll_elem.watched OR coll_elem.ignored)
AND (coll_link.season, coll_link.episode, coll_elem.release_date, coll_elem.id) < (link.season, link.episode, look_elem.release_date, look_elem.id)
"""
)
+ f"""
) AND NOT EXISTS (
SELECT *
FROM mediaelement_mediaelement m_m
INNER JOIN {MediaElement._table_} m ON m_m.mediaelement = m.id
WHERE m_m.mediaelement_2 = {elem_id} AND NOT (m.watched OR m.ignored)
)
"""
)
def is_considered(elem_id: int) -> bool:
return db.exists(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE elem.id = {elem_id}
AND NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered('elem.id')})
"""
)
)
def are_multiple_considered(elem_ids: Iterable[int]) -> Mapping[int, bool]:
res = {
r[0]
for r in db.execute(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered("elem.id")})
"""
)
)
}
return {elem_id: elem_id in res for elem_id in elem_ids}
def get_all_considered(
order_by: str = "NULL", filter_by: str = "true"
) -> List[MediaElement]:
return MediaElement.select_by_sql(
sql_cleanup(
f"""
SELECT elem.*
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND {filter_by}
AND ({sql_is_considered("elem.id")})
ORDER BY {order_by}
"""
)
)
def get_all_elements_tags_recursive() -> Mapping[int, Sequence[Tag]]:
elem_tag_query: Iterable[Tuple[int, int]] = db.execute(
"""
WITH RECURSIVE found_tag (mediaelement_id, tag_id) AS
(
SELECT mediaelement_tag.mediaelement, mediaelement_tag.tag
FROM mediaelement_tag
UNION
SELECT mediacollectionlink.element, mediacollection_tag.tag
FROM mediacollectionlink
JOIN mediacollection_tag ON mediacollectionlink.collection = mediacollection_tag.mediacollection
UNION
SELECT found_tag.mediaelement_id, tag_tag.tag_2
FROM found_tag
JOIN tag_tag ON found_tag.tag_id = tag_tag.tag
)
SELECT found_tag.mediaelement_id, found_tag.tag_id
FROM found_tag
JOIN tag ON found_tag.tag_id = tag.id
WHERE tag.use_for_preferences
ORDER BY mediaelement_id, tag_id;
"""
)
@cache
def get_tag(tag_id: int) -> Tag:
return Tag[tag_id]
return {
elem_id: [get_tag(tag_id) for _, tag_id in group_iter]
for elem_id, group_iter in itertools.groupby(
elem_tag_query, key=lambda row: row[0]
)
}
def update_element_lookup_cache(collection_ids: List[int] = []):
logging.info(
f"Rebuild {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} for {len(collection_ids) if collection_ids else 'all'} collections …"
)
def filter_clause(c_id: str):
return sql_where_in(c_id, collection_ids) if collection_ids else "true"
orm.flush()
sql = [
f"""
DELETE QUICK FROM {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}
WHERE {filter_clause("collection")};
""",
f"""
INSERT INTO {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE} (collection, element1, element2) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{MediaCollection._table_} c
INNER JOIN {MediaCollectionLink._table_} l1 ON
c.id = l1.collection
INNER JOIN {MediaCollectionLink._table_} l2 ON
c.id = l2.collection
INNER JOIN {MediaElement._table_} e1 ON
l1.element = e1.id
INNER JOIN {MediaElement._table_} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
AND {filter_clause("c.id")}
GROUP BY
collection,
element1,
element2
""",
]
for q in sql:
db.execute(sql_cleanup(q))
####
## Custom Table Framework
####
CUSTOM_TABLE_DEFINITIONS: Mapping[SafeStr, LazyValue[str]] = {
SafeStr(table_name): lambda: trim(table_sql())
for table_name, table_sql in {
MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE: lambda: f"""
CREATE TABLE {MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}(
collection INT(11) NOT NULL,
element1 INT(11) NOT NULL,
element2 INT(11) NOT NULL
) SELECT
c.id AS collection,
l1.element AS element1,
l2.element AS element2
FROM
{MediaCollection._table_} c
INNER JOIN {MediaCollectionLink._table_} l1 ON
c.id = l1.collection
INNER JOIN {MediaCollectionLink._table_} l2 ON
c.id = l2.collection
INNER JOIN {MediaElement._table_} e1 ON
l1.element = e1.id
INNER JOIN {MediaElement._table_} e2 ON
l2.element = e2.id
WHERE
(
l1.season,
l1.episode,
e1.release_date,
e1.id
) <(
l2.season,
l2.episode,
e2.release_date,
e2.id
) AND c.watch_in_order
GROUP BY
collection,
element1,
element2;
ALTER TABLE
`{MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}` ADD PRIMARY KEY(`element1`, `element2`, `collection`);
ALTER TABLE
`{MEDIAELEMENT_BLOCKING_LOOKUP_CACHE_TABLE}` ADD INDEX(`collection`);
""",
}.items()
}
def table_exists(table_name: SafeStr) -> bool:
return db.provider.table_exists(
connection=db.get_connection(),
table_name=table_name,
)
@orm.db_session
def setup_custom_tables() -> None:
"""
Creates & fills custom tables (especially cache tables) if they do not exist.
This should not destroy already existing data and should behave indempotent.
"""
for table_name, table_sql in CUSTOM_TABLE_DEFINITIONS.items():
if not table_exists(table_name):
db.execute(table_sql())

@ -0,0 +1,17 @@
from __future__ import annotations
import re
from typing import (
Iterable,
)
SQL_WHITESPACE_PATTERN = re.compile(r"(\s|\n)+")
def sql_cleanup(sql: str) -> str:
return SQL_WHITESPACE_PATTERN.sub(" ", sql).strip()
def sql_where_in(id: str, id_list: Iterable[str | int]) -> str:
return f"{id} IN ({','.join(str(i) for i in id_list)})"

@ -0,0 +1,14 @@
COLLECTION_URI_MAPPING_TABLE = "collectionurimapping"
COLLECTION_TABLE = "mediacollection"
COLLECTION_LINK_TABLE = "mediacollectionlink"
COLLECTION_TAG_MAPPING_TABLE = "mediacollection_tag"
ELEMENT_TABLE = "mediaelement"
ELEMENT_BLOCKING_CACHE_TABLE = "element_lookup_cache"
ELEMENT_BLOCKING_MAPPING_TABLE = "mediaelement_mediaelement"
ELEMENT_TAG_MAPPING_TABLE = "mediaelement_tag"
ELEMENT_URI_MAPPING_TABLE = "mediaurimapping"
TAG_TABLE = "tag"
TAG_HIERACHY_TABLE = "tag_tag"
TAG_KEY_MAPPING_TABLE = "tagkey"
THUMBNAIL_TABLE = "mediathumbnail"
THUMBNAIL_CACHE_TABLE = "mediathumbnailcache"

@ -0,0 +1,53 @@
from __future__ import annotations
from .entities import (
MediaCollection,
MediaCollectionLink,
MediaElement,
)
from .sql_helpers import (
sql_cleanup,
)
from .sql_names import (
ELEMENT_BLOCKING_CACHE_TABLE,
)
# TODO reducing cache table to only contain videos not watched/ignored (not huge speedup)
# TODO add bool for (not)? blocking to direct dependencies (similar to above) (not huge speedup)
def sql_is_considered(elem_id: str, use_cache: bool = True) -> str:
# NOT EXISTS seems worlds better then making a OUTER JOIN
return sql_cleanup(
f"""
NOT EXISTS (
"""
+ (
f"""
SELECT c.element2
FROM {ELEMENT_BLOCKING_CACHE_TABLE} c
INNER JOIN {MediaElement._table_} m2 ON c.element1 = m2.id
WHERE c.element2 = {elem_id} AND NOT (m2.watched OR m2.ignored)
"""
if use_cache
else f"""
SELECT *
FROM {MediaElement._table_} look_elem
INNER JOIN {MediaCollectionLink._table_} link ON link.element = look_elem.id
INNER JOIN {MediaCollection._table_} coll ON coll.id = link.collection
INNER JOIN {MediaCollectionLink._table_} coll_link ON coll_link.collection = coll.id
INNER JOIN {MediaElement._table_} coll_elem ON coll_elem.id = coll_link.element
WHERE look_elem.id = {elem_id}
AND coll.watch_in_order
AND NOT (coll_elem.watched OR coll_elem.ignored)
AND (coll_link.season, coll_link.episode, coll_elem.release_date, coll_elem.id) < (link.season, link.episode, look_elem.release_date, look_elem.id)
"""
)
+ f"""
) AND NOT EXISTS (
SELECT *
FROM mediaelement_mediaelement m_m
INNER JOIN {MediaElement._table_} m ON m_m.mediaelement = m.id
WHERE m_m.mediaelement_2 = {elem_id} AND NOT (m.watched OR m.ignored)
)
"""
)

@ -0,0 +1,111 @@
from __future__ import annotations
from functools import cache
import itertools
from typing import (
Iterable,
List,
Mapping,
Sequence,
Tuple,
)
from .entities import (
MediaElement,
Tag,
db,
)
from .sql_helpers import (
sql_cleanup,
)
from .sql_queries import (
sql_is_considered,
)
def is_considered(elem_id: int) -> bool:
return db.exists(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE elem.id = {elem_id}
AND NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered('elem.id')})
"""
)
)
def are_multiple_considered(elem_ids: Iterable[int]) -> Mapping[int, bool]:
res = {
r[0]
for r in db.execute(
sql_cleanup(
f"""
SELECT elem.id
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND ({sql_is_considered("elem.id")})
"""
)
)
}
return {elem_id: elem_id in res for elem_id in elem_ids}
def get_all_considered(
order_by: str = "NULL",
filter_by: str = "true",
) -> List[MediaElement]:
return MediaElement.select_by_sql(
sql_cleanup(
f"""
SELECT elem.*
FROM {MediaElement._table_} elem
WHERE NOT (elem.watched OR elem.ignored)
AND elem.release_date <= NOW()
AND {filter_by}
AND ({sql_is_considered("elem.id")})
ORDER BY {order_by}
"""
)
)
def get_all_elements_tags_recursive() -> Mapping[int, Sequence[Tag]]:
elem_tag_query: Iterable[Tuple[int, int]] = db.execute(
"""
WITH RECURSIVE found_tag (mediaelement_id, tag_id) AS
(
SELECT mediaelement_tag.mediaelement, mediaelement_tag.tag
FROM mediaelement_tag
UNION
SELECT mediacollectionlink.element, mediacollection_tag.tag
FROM mediacollectionlink
JOIN mediacollection_tag ON mediacollectionlink.collection = mediacollection_tag.mediacollection
UNION
SELECT found_tag.mediaelement_id, tag_tag.tag_2
FROM found_tag
JOIN tag_tag ON found_tag.tag_id = tag_tag.tag
)
SELECT found_tag.mediaelement_id, found_tag.tag_id
FROM found_tag
JOIN tag ON found_tag.tag_id = tag.id
WHERE tag.use_for_preferences
ORDER BY mediaelement_id, tag_id;
"""
)
@cache
def get_tag(tag_id: int) -> Tag:
return Tag[tag_id]
return {
elem_id: [get_tag(tag_id) for _, tag_id in group_iter]
for elem_id, group_iter in itertools.groupby(
elem_tag_query, key=lambda row: row[0]
)
}
Loading…
Cancel
Save