From 7e825d099bd717e08015ee640ecdcdd4f94d4233 Mon Sep 17 00:00:00 2001 From: Kurbanov Bulat Date: Sun, 5 Dec 2021 23:54:50 +0300 Subject: [PATCH] New TGRM search implementation --- fastapi_book_server/app/serializers/author.py | 4 ++ fastapi_book_server/app/services/author.py | 39 ++++++++++-- fastapi_book_server/app/services/book.py | 19 ++++-- fastapi_book_server/app/services/common.py | 61 +++---------------- fastapi_book_server/app/services/sequence.py | 29 ++++++++- fastapi_book_server/app/utils/pagination.py | 4 +- fastapi_book_server/app/views/author.py | 2 +- fastapi_book_server/app/views/book.py | 7 ++- 8 files changed, 91 insertions(+), 74 deletions(-) diff --git a/fastapi_book_server/app/serializers/author.py b/fastapi_book_server/app/serializers/author.py index 1e08d87..3035184 100644 --- a/fastapi_book_server/app/serializers/author.py +++ b/fastapi_book_server/app/serializers/author.py @@ -1,4 +1,5 @@ from typing import Optional +from datetime import date from pydantic import BaseModel @@ -44,6 +45,9 @@ class AuthorBook(BaseModel): title: str lang: str file_type: str + available_types: list[str] + uploaded: date + annotation_exists: bool class Config(ORJSONConfig): pass diff --git a/fastapi_book_server/app/services/author.py b/fastapi_book_server/app/services/author.py index 82cbdb6..ee1595c 100644 --- a/fastapi_book_server/app/services/author.py +++ b/fastapi_book_server/app/services/author.py @@ -3,11 +3,40 @@ from app.models import Author from app.services.common import TRGMSearchService +GET_OBJECTS_IDS_QUERY = """ +SELECT ARRAY( + WITH filtered_authors AS ( + SELECT + id, + GREATEST( + similarity((last_name || ' ' || first_name || ' ' || middle_name), :query), + similarity((last_name || ' ' || first_name), :query), + similarity((last_name), :query) + ) as sml, + ( + SELECT count(*) FROM book_authors + LEFT JOIN books ON books.id = book + WHERE author = authors.id AND books.is_deleted = 'f' + ) as books_count + FROM authors + WHERE ( + (last_name || ' ' || first_name || ' ' || middle_name) % :query OR + (last_name || ' ' || first_name) % :query OR + (last_name) % :query + ) AND + EXISTS ( + SELECT * FROM book_authors + LEFT JOIN books ON books.id = book + WHERE author = authors.id AND books.is_deleted = 'f' + ) + ) + SELECT fauthors.id FROM filtered_authors as fauthors + ORDER BY fauthors.sml DESC, fauthors.books_count DESC +); +""" + + class AuthorTGRMSearchService(TRGMSearchService): MODEL_CLASS = Author - FIELDS = [ - Author.Meta.table.c.last_name, - Author.Meta.table.c.first_name, - Author.Meta.table.c.middle_name - ] PREFETCH_RELATED = ["source", "annotations"] + GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY diff --git a/fastapi_book_server/app/services/book.py b/fastapi_book_server/app/services/book.py index edc5d63..c0b1a27 100644 --- a/fastapi_book_server/app/services/book.py +++ b/fastapi_book_server/app/services/book.py @@ -8,15 +8,22 @@ from app.services.common import TRGMSearchService from app.serializers.book import CreateBook, CreateRemoteBook +GET_OBJECTS_IDS_QUERY = """ +SELECT ARRAY( + WITH filtered_books AS ( + SELECT id, similarity(title, :query) as sml FROM books + WHERE books.title % :query + ) + SELECT fbooks.id FROM filtered_books as fbooks + ORDER BY fbooks.sml DESC, fbooks.id +); +""" + + class BookTGRMSearchService(TRGMSearchService): MODEL_CLASS = BookDB - FIELDS = [ - BookDB.Meta.table.c.title - ] PREFETCH_RELATED = ["source", "authors", "annotations"] - FILTERS = [ - BookDB.Meta.table.c.is_deleted == False, - ] + GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY class BookCreator: diff --git a/fastapi_book_server/app/services/common.py b/fastapi_book_server/app/services/common.py index 8a46a5e..7471764 100644 --- a/fastapi_book_server/app/services/common.py +++ b/fastapi_book_server/app/services/common.py @@ -1,7 +1,5 @@ from typing import Optional, Generic, TypeVar, Union -from itertools import permutations from databases import Database -import json from fastapi_pagination.api import resolve_params from fastapi_pagination.bases import AbstractParams, RawParams @@ -10,17 +8,7 @@ import aioredis import orjson from ormar import Model, QuerySet -from sqlalchemy import text, func, select, or_, Table, Column, cast, Text -from sqlalchemy.orm import Session - - -def join_fields(fields): - result = fields[0] - - for el in fields[1:]: - result += text("' '") + el - - return result +from sqlalchemy import Table T = TypeVar('T', bound=Model) @@ -28,10 +16,9 @@ T = TypeVar('T', bound=Model) class TRGMSearchService(Generic[T]): MODEL_CLASS: Optional[T] = None - FIELDS: Optional[list[Column]] = None SELECT_RELATED: Optional[Union[list[str], str]] = None PREFETCH_RELATED: Optional[Union[list[str], str]] = None - FILTERS = [] + GET_OBJECT_IDS_QUERY: Optional[str] = None CACHE_TTL = 5 * 60 @classmethod @@ -60,52 +47,18 @@ class TRGMSearchService(Generic[T]): @classmethod @property - def fields_combinations(cls): - assert cls.FIELDS is not None, f"FIELDS in {cls.__name__} don't set!" - assert len(cls.FIELDS) != 0, f"FIELDS in {cls.__name__} must be not empty!" - - return permutations(cls.FIELDS, len(cls.FIELDS)) - - @classmethod - def get_similarity_subquery(cls, query: str): - combs = cls.fields_combinations - - return func.greatest( - *[func.similarity(join_fields(comb), cast(query, Text)) for comb in combs] - ).label("sml") - - @classmethod - def get_similarity_filter_subquery(cls, query: str): - return or_( - *[join_fields(comb) % f"{query}::text" for comb in cls.fields_combinations] - ) + def object_ids_query(cls) -> str: + assert cls.GET_OBJECT_IDS_QUERY is not None, f"GET_OBJECT_IDS_QUERY in {cls.__name__} don't set!" + return cls.GET_OBJECT_IDS_QUERY @classmethod async def _get_object_ids(cls, query_data: str) -> list[int]: - similarity = cls.get_similarity_subquery(query_data) - similarity_filter = cls.get_similarity_filter_subquery(query_data) - - session = Session(cls.database.connection()) - - filtered_objects_query = session.query( - cls.table.c.id, similarity - ).order_by( - text('sml DESC') - ).filter( - similarity_filter, - *cls.FILTERS - ).cte('objs') - - object_ids_query = session.query( - func.array_agg(filtered_objects_query.c.id) - ).cte() - - row = await cls.database.fetch_one(object_ids_query) + row = await cls.database.fetch_one(cls.object_ids_query, {"query": query_data}) if row is None: raise ValueError('Something is wrong!') - return row['array_agg_1'] + return row['array'] @classmethod def get_cache_key(cls, query_data: str) -> str: diff --git a/fastapi_book_server/app/services/sequence.py b/fastapi_book_server/app/services/sequence.py index 225de36..70b160a 100644 --- a/fastapi_book_server/app/services/sequence.py +++ b/fastapi_book_server/app/services/sequence.py @@ -3,9 +3,32 @@ from app.models import Sequence from app.services.common import TRGMSearchService +GET_OBJECTS_IDS_QUERY = """ +EXPLAIN ANALYZE SELECT ARRAY ( + WITH filtered_sequences AS ( + SELECT + id, + similarity(name, :query) as sml, + ( + SELECT count(*) FROM book_sequences + LEFT JOIN books ON books.id = book + WHERE sequence = sequences.id AND books.is_deleted = 'f' + ) as books_count + FROM sequences + WHERE name % :query AND + EXISTS ( + SELECT * FROM book_sequences + LEFT JOIN books ON books.id = book + WHERE sequence = sequences.id AND books.is_deleted = 'f' + ) + ) + SELECT fsequences.id FROM filtered_sequences as fsequences + ORDER BY fsequences.sml DESC, fsequences.books_count DESC +); +""" + + class SequenceTGRMSearchService(TRGMSearchService): MODEL_CLASS = Sequence - FIELDS = [ - Sequence.Meta.table.c.name - ] PREFETCH_RELATED = ["source"] + GET_OBJECTS_IDS_QUERY = GET_OBJECTS_IDS_QUERY diff --git a/fastapi_book_server/app/utils/pagination.py b/fastapi_book_server/app/utils/pagination.py index 6df411e..63dbfb2 100644 --- a/fastapi_book_server/app/utils/pagination.py +++ b/fastapi_book_server/app/utils/pagination.py @@ -1,6 +1,6 @@ from typing import Protocol, TypeVar, Any, Generic, Sequence, runtime_checkable -from pydantic import PositiveInt +from pydantic import conint from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractParams @@ -16,7 +16,7 @@ T = TypeVar('T', ToDict, Any) class CustomPage(Page[T], Generic[T]): - total_pages: PositiveInt + total_pages: conint(ge=0) # type: ignore @classmethod def create( diff --git a/fastapi_book_server/app/views/author.py b/fastapi_book_server/app/views/author.py index 31b54c9..7394505 100644 --- a/fastapi_book_server/app/views/author.py +++ b/fastapi_book_server/app/views/author.py @@ -69,7 +69,7 @@ async def get_author_annotation(id: int): @author_router.get("/{id}/books", response_model=CustomPage[AuthorBook], dependencies=[Depends(Params)]) async def get_author_books(id: int): return await paginate( - BookDB.objects.filter(author__id=id).order_by('title') + BookDB.objects.select_related(["source", "annotations"]).filter(authors__id=id).order_by('title') ) diff --git a/fastapi_book_server/app/views/book.py b/fastapi_book_server/app/views/book.py index 128c7be..f491ac5 100644 --- a/fastapi_book_server/app/views/book.py +++ b/fastapi_book_server/app/views/book.py @@ -6,8 +6,9 @@ from fastapi_pagination import Params from fastapi_pagination.ext.ormar import paginate from app.utils.pagination import CustomPage -from app.models import Book as BookDB, Author as AuthorDB, AuthorAnnotation as AuthorAnnotationDB +from app.models import Book as BookDB, Author as AuthorDB, BookAnnotation as BookAnnotationDB from app.serializers.book import Book, RemoteBook, BookDetail, CreateBook, UpdateBook, CreateRemoteBook +from app.serializers.book_annotation import BookAnnotation from app.services.book import BookTGRMSearchService, BookCreator from app.filters.book import get_book_filter from app.depends import check_token @@ -82,9 +83,9 @@ async def update_book(id: int, data: UpdateBook): return book -@book_router.get("/{id}/annotation") +@book_router.get("/{id}/annotation", response_model=BookAnnotation) async def get_book_annotation(id: int): - annotation = await AuthorAnnotationDB.objects.get(book__id=id) + annotation = await BookAnnotationDB.objects.get(book__id=id) if annotation is None: raise HTTPException(status.HTTP_404_NOT_FOUND)