mirror of
https://github.com/flibusta-apps/book_library_server.git
synced 2025-12-06 15:15:36 +01:00
New TGRM search implementation
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from typing import Optional
|
||||
from datetime import date
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -44,6 +45,9 @@ class AuthorBook(BaseModel):
|
||||
title: str
|
||||
lang: str
|
||||
file_type: str
|
||||
available_types: list[str]
|
||||
uploaded: date
|
||||
annotation_exists: bool
|
||||
|
||||
class Config(ORJSONConfig):
|
||||
pass
|
||||
|
||||
@@ -3,11 +3,40 @@ from app.models import Author
|
||||
from app.services.common import TRGMSearchService
|
||||
|
||||
|
||||
GET_OBJECTS_IDS_QUERY = """
|
||||
SELECT ARRAY(
|
||||
WITH filtered_authors AS (
|
||||
SELECT
|
||||
id,
|
||||
GREATEST(
|
||||
similarity((last_name || ' ' || first_name || ' ' || middle_name), :query),
|
||||
similarity((last_name || ' ' || first_name), :query),
|
||||
similarity((last_name), :query)
|
||||
) as sml,
|
||||
(
|
||||
SELECT count(*) FROM book_authors
|
||||
LEFT JOIN books ON books.id = book
|
||||
WHERE author = authors.id AND books.is_deleted = 'f'
|
||||
) as books_count
|
||||
FROM authors
|
||||
WHERE (
|
||||
(last_name || ' ' || first_name || ' ' || middle_name) % :query OR
|
||||
(last_name || ' ' || first_name) % :query OR
|
||||
(last_name) % :query
|
||||
) AND
|
||||
EXISTS (
|
||||
SELECT * FROM book_authors
|
||||
LEFT JOIN books ON books.id = book
|
||||
WHERE author = authors.id AND books.is_deleted = 'f'
|
||||
)
|
||||
)
|
||||
SELECT fauthors.id FROM filtered_authors as fauthors
|
||||
ORDER BY fauthors.sml DESC, fauthors.books_count DESC
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
class AuthorTGRMSearchService(TRGMSearchService):
|
||||
MODEL_CLASS = Author
|
||||
FIELDS = [
|
||||
Author.Meta.table.c.last_name,
|
||||
Author.Meta.table.c.first_name,
|
||||
Author.Meta.table.c.middle_name
|
||||
]
|
||||
PREFETCH_RELATED = ["source", "annotations"]
|
||||
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||
|
||||
@@ -8,15 +8,22 @@ from app.services.common import TRGMSearchService
|
||||
from app.serializers.book import CreateBook, CreateRemoteBook
|
||||
|
||||
|
||||
GET_OBJECTS_IDS_QUERY = """
|
||||
SELECT ARRAY(
|
||||
WITH filtered_books AS (
|
||||
SELECT id, similarity(title, :query) as sml FROM books
|
||||
WHERE books.title % :query
|
||||
)
|
||||
SELECT fbooks.id FROM filtered_books as fbooks
|
||||
ORDER BY fbooks.sml DESC, fbooks.id
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
class BookTGRMSearchService(TRGMSearchService):
|
||||
MODEL_CLASS = BookDB
|
||||
FIELDS = [
|
||||
BookDB.Meta.table.c.title
|
||||
]
|
||||
PREFETCH_RELATED = ["source", "authors", "annotations"]
|
||||
FILTERS = [
|
||||
BookDB.Meta.table.c.is_deleted == False,
|
||||
]
|
||||
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||
|
||||
|
||||
class BookCreator:
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from typing import Optional, Generic, TypeVar, Union
|
||||
from itertools import permutations
|
||||
from databases import Database
|
||||
import json
|
||||
|
||||
from fastapi_pagination.api import resolve_params
|
||||
from fastapi_pagination.bases import AbstractParams, RawParams
|
||||
@@ -10,17 +8,7 @@ import aioredis
|
||||
import orjson
|
||||
|
||||
from ormar import Model, QuerySet
|
||||
from sqlalchemy import text, func, select, or_, Table, Column, cast, Text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
def join_fields(fields):
|
||||
result = fields[0]
|
||||
|
||||
for el in fields[1:]:
|
||||
result += text("' '") + el
|
||||
|
||||
return result
|
||||
from sqlalchemy import Table
|
||||
|
||||
|
||||
T = TypeVar('T', bound=Model)
|
||||
@@ -28,10 +16,9 @@ T = TypeVar('T', bound=Model)
|
||||
|
||||
class TRGMSearchService(Generic[T]):
|
||||
MODEL_CLASS: Optional[T] = None
|
||||
FIELDS: Optional[list[Column]] = None
|
||||
SELECT_RELATED: Optional[Union[list[str], str]] = None
|
||||
PREFETCH_RELATED: Optional[Union[list[str], str]] = None
|
||||
FILTERS = []
|
||||
GET_OBJECT_IDS_QUERY: Optional[str] = None
|
||||
CACHE_TTL = 5 * 60
|
||||
|
||||
@classmethod
|
||||
@@ -60,52 +47,18 @@ class TRGMSearchService(Generic[T]):
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def fields_combinations(cls):
|
||||
assert cls.FIELDS is not None, f"FIELDS in {cls.__name__} don't set!"
|
||||
assert len(cls.FIELDS) != 0, f"FIELDS in {cls.__name__} must be not empty!"
|
||||
|
||||
return permutations(cls.FIELDS, len(cls.FIELDS))
|
||||
|
||||
@classmethod
|
||||
def get_similarity_subquery(cls, query: str):
|
||||
combs = cls.fields_combinations
|
||||
|
||||
return func.greatest(
|
||||
*[func.similarity(join_fields(comb), cast(query, Text)) for comb in combs]
|
||||
).label("sml")
|
||||
|
||||
@classmethod
|
||||
def get_similarity_filter_subquery(cls, query: str):
|
||||
return or_(
|
||||
*[join_fields(comb) % f"{query}::text" for comb in cls.fields_combinations]
|
||||
)
|
||||
def object_ids_query(cls) -> str:
|
||||
assert cls.GET_OBJECT_IDS_QUERY is not None, f"GET_OBJECT_IDS_QUERY in {cls.__name__} don't set!"
|
||||
return cls.GET_OBJECT_IDS_QUERY
|
||||
|
||||
@classmethod
|
||||
async def _get_object_ids(cls, query_data: str) -> list[int]:
|
||||
similarity = cls.get_similarity_subquery(query_data)
|
||||
similarity_filter = cls.get_similarity_filter_subquery(query_data)
|
||||
|
||||
session = Session(cls.database.connection())
|
||||
|
||||
filtered_objects_query = session.query(
|
||||
cls.table.c.id, similarity
|
||||
).order_by(
|
||||
text('sml DESC')
|
||||
).filter(
|
||||
similarity_filter,
|
||||
*cls.FILTERS
|
||||
).cte('objs')
|
||||
|
||||
object_ids_query = session.query(
|
||||
func.array_agg(filtered_objects_query.c.id)
|
||||
).cte()
|
||||
|
||||
row = await cls.database.fetch_one(object_ids_query)
|
||||
row = await cls.database.fetch_one(cls.object_ids_query, {"query": query_data})
|
||||
|
||||
if row is None:
|
||||
raise ValueError('Something is wrong!')
|
||||
|
||||
return row['array_agg_1']
|
||||
return row['array']
|
||||
|
||||
@classmethod
|
||||
def get_cache_key(cls, query_data: str) -> str:
|
||||
|
||||
@@ -3,9 +3,32 @@ from app.models import Sequence
|
||||
from app.services.common import TRGMSearchService
|
||||
|
||||
|
||||
GET_OBJECTS_IDS_QUERY = """
|
||||
EXPLAIN ANALYZE SELECT ARRAY (
|
||||
WITH filtered_sequences AS (
|
||||
SELECT
|
||||
id,
|
||||
similarity(name, :query) as sml,
|
||||
(
|
||||
SELECT count(*) FROM book_sequences
|
||||
LEFT JOIN books ON books.id = book
|
||||
WHERE sequence = sequences.id AND books.is_deleted = 'f'
|
||||
) as books_count
|
||||
FROM sequences
|
||||
WHERE name % :query AND
|
||||
EXISTS (
|
||||
SELECT * FROM book_sequences
|
||||
LEFT JOIN books ON books.id = book
|
||||
WHERE sequence = sequences.id AND books.is_deleted = 'f'
|
||||
)
|
||||
)
|
||||
SELECT fsequences.id FROM filtered_sequences as fsequences
|
||||
ORDER BY fsequences.sml DESC, fsequences.books_count DESC
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
class SequenceTGRMSearchService(TRGMSearchService):
|
||||
MODEL_CLASS = Sequence
|
||||
FIELDS = [
|
||||
Sequence.Meta.table.c.name
|
||||
]
|
||||
PREFETCH_RELATED = ["source"]
|
||||
GET_OBJECTS_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Protocol, TypeVar, Any, Generic, Sequence, runtime_checkable
|
||||
|
||||
from pydantic import PositiveInt
|
||||
from pydantic import conint
|
||||
|
||||
from fastapi_pagination import Page, Params
|
||||
from fastapi_pagination.bases import AbstractParams
|
||||
@@ -16,7 +16,7 @@ T = TypeVar('T', ToDict, Any)
|
||||
|
||||
|
||||
class CustomPage(Page[T], Generic[T]):
|
||||
total_pages: PositiveInt
|
||||
total_pages: conint(ge=0) # type: ignore
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
|
||||
@@ -69,7 +69,7 @@ async def get_author_annotation(id: int):
|
||||
@author_router.get("/{id}/books", response_model=CustomPage[AuthorBook], dependencies=[Depends(Params)])
|
||||
async def get_author_books(id: int):
|
||||
return await paginate(
|
||||
BookDB.objects.filter(author__id=id).order_by('title')
|
||||
BookDB.objects.select_related(["source", "annotations"]).filter(authors__id=id).order_by('title')
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -6,8 +6,9 @@ from fastapi_pagination import Params
|
||||
from fastapi_pagination.ext.ormar import paginate
|
||||
from app.utils.pagination import CustomPage
|
||||
|
||||
from app.models import Book as BookDB, Author as AuthorDB, AuthorAnnotation as AuthorAnnotationDB
|
||||
from app.models import Book as BookDB, Author as AuthorDB, BookAnnotation as BookAnnotationDB
|
||||
from app.serializers.book import Book, RemoteBook, BookDetail, CreateBook, UpdateBook, CreateRemoteBook
|
||||
from app.serializers.book_annotation import BookAnnotation
|
||||
from app.services.book import BookTGRMSearchService, BookCreator
|
||||
from app.filters.book import get_book_filter
|
||||
from app.depends import check_token
|
||||
@@ -82,9 +83,9 @@ async def update_book(id: int, data: UpdateBook):
|
||||
return book
|
||||
|
||||
|
||||
@book_router.get("/{id}/annotation")
|
||||
@book_router.get("/{id}/annotation", response_model=BookAnnotation)
|
||||
async def get_book_annotation(id: int):
|
||||
annotation = await AuthorAnnotationDB.objects.get(book__id=id)
|
||||
annotation = await BookAnnotationDB.objects.get(book__id=id)
|
||||
|
||||
if annotation is None:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND)
|
||||
|
||||
Reference in New Issue
Block a user