From 64e1fd33e7e7d1cfd0bbbc3b6b060aa2257463bf Mon Sep 17 00:00:00 2001 From: Kurbanov Bulat Date: Wed, 2 Feb 2022 20:42:24 +0300 Subject: [PATCH] Update documents structures --- src/app/services.py | 40 +++++++++++++++++++++++++++++++++------- src/core/setup_arq.py | 3 ++- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/app/services.py b/src/app/services.py index 7c76b9f..af3712d 100644 --- a/src/app/services.py +++ b/src/app/services.py @@ -31,12 +31,21 @@ async def update_books(ctx) -> bool: postgres_pool = await get_postgres_pool() with concurrent.futures.ThreadPoolExecutor() as pool: - count = await postgres_pool.fetchval("SELECT count(*) FROM books;") + count = await postgres_pool.fetchval( + "SELECT count(*) FROM books WHERE is_deleted = 'f';" + ) for offset in range(0, count, 4096): rows = await postgres_pool.fetch( - "SELECT id, title, is_deleted FROM books" - f" ORDER BY id LIMIT 4096 OFFSET {offset}" + "SELECT id, title, lang, " + " array(SELECT author FROM book_authors " + " WHERE books.id = book_authors.book) as authors, " + " array(SELECT author FROM translations " + " WHERE books.id = translations.book) as translators, " + " array(SELECT sequence FROM book_sequences " + " WHERE books.id = book_sequences.book) as sequences " + "FROM books " + f"ORDER BY id LIMIT 4096 OFFSET {offset}" ) documents = [dict(row) for row in rows] @@ -44,7 +53,7 @@ async def update_books(ctx) -> bool: await loop.run_in_executor(pool, index.add_documents, documents) index.update_searchable_attributes(["title"]) - index.update_filterable_attributes(["is_deleted"]) + index.update_filterable_attributes(["lang", "authors", "translators", "sequences"]) return True @@ -62,8 +71,15 @@ async def update_authors(ctx) -> bool: for offset in range(0, count, 4096): rows = await postgres_pool.fetch( - "SELECT id, first_name, last_name, middle_name FROM authors" - f" ORDER BY id LIMIT 4096 OFFSET {offset}" + "SELECT id, first_name, last_name, middle_name, " + " array(" + " SELECT DISTINCT lang FROM book_authors " + " LEFT JOIN books ON book = books.id " + " WHERE authors.id = book_authors.author " + " AND books.is_deleted = 'f' " + " ) as langs " + "FROM authors " + f"ORDER BY id LIMIT 4096 OFFSET {offset}" ) documents = [dict(row) for row in rows] @@ -71,6 +87,7 @@ async def update_authors(ctx) -> bool: await loop.run_in_executor(pool, index.add_documents, documents) index.update_searchable_attributes(["first_name", "last_name", "middle_name"]) + index.update_filterable_attributes(["langs"]) return True @@ -88,7 +105,15 @@ async def update_sequences(ctx) -> bool: for offset in range(0, count, 4096): rows = await postgres_pool.fetch( - f"SELECT id, name FROM sequences ORDER BY id LIMIT 4096 OFFSET {offset}" + "SELECT id, name, " + " array(" + " SELECT DISTINCT lang FROM book_sequences " + " LEFT JOIN books ON book = books.id " + " WHERE sequences.id = book_sequences.sequence " + " AND books.is_deleted = 'f' " + " ) as langs " + "FROM sequences " + f"ORDER BY id LIMIT 4096 OFFSET {offset}" ) documents = [dict(row) for row in rows] @@ -96,6 +121,7 @@ async def update_sequences(ctx) -> bool: await loop.run_in_executor(pool, index.add_documents, documents) index.update_searchable_attributes(["name"]) + index.update_filterable_attributes(["langs"]) return True diff --git a/src/core/setup_arq.py b/src/core/setup_arq.py index b2ab95c..450d509 100644 --- a/src/core/setup_arq.py +++ b/src/core/setup_arq.py @@ -10,4 +10,5 @@ class WorkerSettings: functions = [update, update_books, update_authors, update_sequences] on_startup = startup redis_settings = get_redis_settings() - max_jobs = 2 + max_jobs = 3 + job_timeout = 15 * 60