Skip to content

Commit 29af0d8

Browse files
authored
Merge pull request #131 from openzim/generate_library
Generate library
2 parents 7bef76c + 4c0dcdf commit 29af0d8

File tree

14 files changed

+901
-6
lines changed

14 files changed

+901
-6
lines changed

backend/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,8 @@ extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"]
216216
ban-relative-imports = "all"
217217

218218
[tool.ruff.lint.per-file-ignores]
219-
# Tests can use magic values, assertions, and relative imports
220-
"tests/**/*" = ["PLR2004", "S101", "TID252"]
219+
# Tests can use magic values, assertions, relative imports and unsafe XML
220+
"tests/**/*" = ["PLR2004", "S101", "TID252", "S314"]
221221
# Ignore invalid module names for scripts
222222
"src/cms_backend/periodic-tasks.py" = ["N999"]
223223
"src/cms_backend/periodic-scheduler.py" = ["N999"]

backend/src/cms_backend/api/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from cms_backend.api.routes.books import router as books_router
1313
from cms_backend.api.routes.healthcheck import router as healthcheck_router
1414
from cms_backend.api.routes.http_errors import BadRequestError
15+
from cms_backend.api.routes.library import router as library_router
1516
from cms_backend.api.routes.titles import router as titles_router
1617
from cms_backend.api.routes.warehouse_paths import router as warehouse_paths_router
1718
from cms_backend.api.routes.zimfarm_notifications import (
@@ -58,6 +59,7 @@ def create_app(*, debug: bool = True):
5859
main_router.include_router(router=titles_router)
5960
main_router.include_router(router=books_router)
6061
main_router.include_router(router=warehouse_paths_router)
62+
main_router.include_router(router=library_router)
6163

6264
app.include_router(router=main_router)
6365

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from http import HTTPStatus
2+
from typing import Annotated
3+
from uuid import UUID
4+
from xml.etree import ElementTree as ET
5+
6+
from fastapi import APIRouter, Depends, Path
7+
from fastapi.responses import Response
8+
from sqlalchemy.orm import Session as OrmSession
9+
10+
from cms_backend.db import gen_dbsession
11+
from cms_backend.db.exceptions import RecordDoesNotExistError
12+
from cms_backend.db.library import (
13+
get_latest_books_for_library,
14+
get_library,
15+
get_library_by_name_or_none,
16+
)
17+
from cms_backend.db.models import Book
18+
19+
router = APIRouter(prefix="/libraries", tags=["libraries"])
20+
21+
22+
def _build_library_xml(books: list[Book]) -> str:
23+
"""Build XML library catalog from books."""
24+
library_elem = ET.Element("library")
25+
library_elem.set("version", "20110515")
26+
27+
for book in books:
28+
if not book.zim_metadata:
29+
continue
30+
31+
book_elem = ET.SubElement(library_elem, "book")
32+
33+
# Required attributes
34+
book_elem.set("id", str(book.id))
35+
book_elem.set("size", str(book.size))
36+
book_elem.set("mediaCount", str(book.media_count))
37+
book_elem.set("articleCount", str(book.article_count))
38+
39+
# Metadata from zim_metadata dict
40+
zim_meta = book.zim_metadata
41+
book_elem.set("title", zim_meta.get("Title", ""))
42+
book_elem.set("description", zim_meta.get("Description", ""))
43+
book_elem.set("language", zim_meta.get("Language", ""))
44+
book_elem.set("creator", zim_meta.get("Creator", ""))
45+
book_elem.set("publisher", zim_meta.get("Publisher", ""))
46+
book_elem.set("name", zim_meta.get("Name", ""))
47+
book_elem.set("date", zim_meta.get("Date", ""))
48+
49+
# Optional tags - combine with underscores if present
50+
tags = zim_meta.get("Tags", "")
51+
if tags:
52+
book_elem.set("tags", tags)
53+
54+
# Favicon and faviconMimeType - these are typically extracted from the ZIM
55+
# but for now we'll use empty strings as the data structure doesn't contain them
56+
favicon = zim_meta.get("Illustration_48x48@1", "")
57+
if favicon:
58+
book_elem.set("favicon", favicon)
59+
book_elem.set("faviconMimeType", "image/png")
60+
61+
# URL - would need to be constructed from warehouse config
62+
# For now, leaving empty as warehouse config mapping is not implemented
63+
url = zim_meta.get("URL", "")
64+
if url:
65+
book_elem.set("url", url)
66+
67+
return ET.tostring(library_elem, encoding="unicode")
68+
69+
70+
@router.get("/{library_id_or_name}/catalog.xml")
71+
async def get_library_catalog_xml(
72+
library_id_or_name: Annotated[str, Path()],
73+
session: Annotated[OrmSession, Depends(gen_dbsession)],
74+
):
75+
"""Get library catalog as XML. Library can be specified by ID (UUID) or name."""
76+
# Try to parse as UUID first, otherwise treat as name
77+
library = None
78+
try:
79+
library_id = UUID(library_id_or_name)
80+
try:
81+
library = get_library(session, library_id)
82+
except RecordDoesNotExistError:
83+
pass
84+
except ValueError:
85+
# Not a valid UUID, try as name
86+
library = get_library_by_name_or_none(session, library_id_or_name)
87+
88+
if library is None:
89+
return Response(
90+
content='<?xml version="1.0" encoding="UTF-8"?>'
91+
'<library version="20110515"></library>',
92+
status_code=HTTPStatus.NOT_FOUND,
93+
media_type="application/xml",
94+
)
95+
96+
books = get_latest_books_for_library(session, library.id)
97+
xml_content = _build_library_xml(books)
98+
99+
return Response(
100+
content=xml_content,
101+
status_code=HTTPStatus.OK,
102+
media_type="application/xml",
103+
)
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from uuid import UUID
2+
3+
from sqlalchemy import and_, select
4+
from sqlalchemy.orm import Session as OrmSession
5+
6+
from cms_backend.db.exceptions import RecordDoesNotExistError
7+
from cms_backend.db.models import Book, BookLocation, Library, LibraryWarehousePath
8+
9+
10+
def get_library_or_none(session: OrmSession, library_id: UUID) -> Library | None:
11+
"""Get a library by ID if possible else None"""
12+
return session.scalars(
13+
select(Library).where(Library.id == library_id)
14+
).one_or_none()
15+
16+
17+
def get_library(session: OrmSession, library_id: UUID) -> Library:
18+
"""Get a library by ID if possible else raise an exception"""
19+
if (library := get_library_or_none(session, library_id=library_id)) is None:
20+
raise RecordDoesNotExistError(f"Library with ID {library_id} does not exist")
21+
return library
22+
23+
24+
def get_library_by_name_or_none(
25+
session: OrmSession, library_name: str
26+
) -> Library | None:
27+
"""Get a library by name if possible else None"""
28+
return session.scalars(
29+
select(Library).where(Library.name == library_name)
30+
).one_or_none()
31+
32+
33+
def get_latest_books_for_library(session: OrmSession, library_id: UUID) -> list[Book]:
34+
"""
35+
Get the latest book for each name+flavour combination in a library.
36+
37+
A library contains multiple warehouse paths. For each unique name+flavour
38+
combination found in the library's warehouse paths, return only the most
39+
recent published book (by created_at).
40+
41+
Args:
42+
session: ORM session
43+
library_id: ID of the library
44+
45+
Returns:
46+
List of Book objects, one per name+flavour combination
47+
"""
48+
# Get all books in the library's warehouse paths that are published
49+
# and currently located there
50+
stmt = (
51+
select(Book)
52+
.join(BookLocation)
53+
.join(
54+
LibraryWarehousePath,
55+
BookLocation.warehouse_path_id == LibraryWarehousePath.warehouse_path_id,
56+
)
57+
.where(
58+
and_(
59+
LibraryWarehousePath.library_id == library_id,
60+
BookLocation.status == "current",
61+
Book.status == "published",
62+
)
63+
)
64+
.order_by(Book.name, Book.flavour, Book.created_at.desc())
65+
)
66+
67+
books = session.scalars(stmt).all()
68+
69+
# Filter to keep only the latest book per name+flavour combination
70+
seen: set[tuple[str | None, str | None]] = set()
71+
latest_books: list[Book] = []
72+
for book in books:
73+
key = (book.name, book.flavour)
74+
if key not in seen:
75+
seen.add(key)
76+
latest_books.append(book)
77+
78+
return latest_books

backend/src/cms_backend/db/models.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,33 @@ def full_str(self) -> str:
246246
f"{self.warehouse_path.warehouse.name}:"
247247
f"{self.warehouse_path.folder_name}/{self.filename}"
248248
)
249+
250+
251+
class Library(Base):
252+
__tablename__ = "library"
253+
id: Mapped[UUID] = mapped_column(
254+
init=False, primary_key=True, server_default=text("uuid_generate_v4()")
255+
)
256+
name: Mapped[str] = mapped_column(unique=True, index=True)
257+
258+
# Warehouse paths via junction table
259+
warehouse_paths: Mapped[list["LibraryWarehousePath"]] = relationship(
260+
back_populates="library",
261+
cascade="all, delete-orphan",
262+
init=False,
263+
)
264+
265+
266+
class LibraryWarehousePath(Base):
267+
__tablename__ = "library_warehouse_path"
268+
library_id: Mapped[UUID] = mapped_column(
269+
ForeignKey("library.id"), primary_key=True, init=False
270+
)
271+
warehouse_path_id: Mapped[UUID] = mapped_column(
272+
ForeignKey("warehouse_path.id"), primary_key=True, init=False
273+
)
274+
275+
library: Mapped["Library"] = relationship(
276+
back_populates="warehouse_paths", init=False
277+
)
278+
warehouse_path: Mapped["WarehousePath"] = relationship(init=False)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""Add library and library_warehouse_path tables
2+
3+
Revision ID: add_library_tables
4+
Revises: add_pending_move_index
5+
Create Date: 2025-11-18 00:00:00.000000
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from alembic import op
11+
from sqlalchemy.dialects.postgresql import UUID
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "add_library_tables"
15+
down_revision = "add_pending_move_index"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# Create library table
22+
op.create_table(
23+
"library",
24+
sa.Column(
25+
"id", UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False
26+
),
27+
sa.Column("name", sa.String(), nullable=False),
28+
sa.PrimaryKeyConstraint("id", name="pk_library"),
29+
sa.UniqueConstraint("name", name="uq_library_name"),
30+
)
31+
32+
# Create unique index on library name for fast lookups
33+
op.create_index("ix_library_name", "library", ["name"], unique=True)
34+
35+
# Create library_warehouse_path junction table
36+
op.create_table(
37+
"library_warehouse_path",
38+
sa.Column("library_id", UUID(), nullable=False),
39+
sa.Column("warehouse_path_id", UUID(), nullable=False),
40+
sa.ForeignKeyConstraint(
41+
["library_id"],
42+
["library.id"],
43+
name="fk_library_warehouse_path_library_id_library",
44+
),
45+
sa.ForeignKeyConstraint(
46+
["warehouse_path_id"],
47+
["warehouse_path.id"],
48+
name="fk_library_warehouse_path_warehouse_path_id_warehouse_path",
49+
),
50+
sa.PrimaryKeyConstraint(
51+
"library_id", "warehouse_path_id", name="pk_library_warehouse_path"
52+
),
53+
)
54+
55+
56+
def downgrade():
57+
# Drop tables in reverse order due to foreign key constraints
58+
op.drop_table("library_warehouse_path")
59+
op.drop_table("library")

0 commit comments

Comments
 (0)