Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
PG_USER=welearn_datastack
PG_PASSWORD=...
PG_HOST=...
PG_PORT=5432
PG_DB=welearn_datastack
PG_DRIVER=postgresql+psycopg2
PG_SCHEMA=document_related,corpus_related,user_related,agent_related
LOG_LEVEL=INFO
LOG_FORMAT=[%(asctime)s][%(name)s][%(levelname)s] - %(message)s
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "welearn-database"
version = "1.2.0"
version = "1.3.0"
description = "All stuff related to relationnal database from the WeLearn project"
authors = [
{name = "Théo",email = "theo.nardin@cri-paris.org"}
Expand Down
195 changes: 195 additions & 0 deletions tests/test_user_related.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import uuid
from datetime import datetime, timedelta
from unittest import TestCase

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from tests.helpers import handle_schema_with_sqlite
from welearn_database.data.models import Base
from welearn_database.data.models.user_related import (
APIKeyManagement,
Bookmark,
ChatMessage,
DataCollectionCampaignManagement,
EndpointRequest,
InferredUser,
ReturnedDocument,
)
from welearn_database.data.models.user_related import Session as UserSession
from welearn_database.data.models.user_related import (
UserProfile,
)


class TestUserRelatedCRUD(TestCase):
def setUp(self):
self.engine = create_engine("sqlite://")
handle_schema_with_sqlite(self.engine)
self.s_maker = sessionmaker(self.engine)
Base.metadata.create_all(self.engine)
self.session = self.s_maker()

def tearDown(self):
self.session.close()
Base.metadata.drop_all(self.engine)

def test_create_and_read_user_profile(self):
user = UserProfile(
id=uuid.uuid4(),
username="testuser",
email="test@example.com",
password_digest=b"hashed",
)
self.session.add(user)
self.session.commit()
result = self.session.query(UserProfile).filter_by(username="testuser").first()
self.assertIsNotNone(result)
self.assertEqual(result.email, "test@example.com")

def test_create_and_read_inferred_user(self):
inferred_user = InferredUser(
id=uuid.uuid4(),
origin_referrer="test_ref",
)
self.session.add(inferred_user)
self.session.commit()
result = (
self.session.query(InferredUser)
.filter_by(origin_referrer="test_ref")
.first()
)
self.assertIsNotNone(result)

def test_create_and_read_session(self):
inferred_user = InferredUser(id=uuid.uuid4())
self.session.add(inferred_user)
self.session.commit()
session = UserSession(
id=uuid.uuid4(),
inferred_user_id=inferred_user.id,
origin_referrer="ref",
end_at=datetime.now() + timedelta(hours=1),
host="localhost",
)
self.session.add(session)
self.session.commit()
result = self.session.query(UserSession).filter_by(host="localhost").first()
self.assertIsNotNone(result)

def test_create_and_read_api_key_management(self):
api_key = APIKeyManagement(
id=uuid.uuid4(),
title="key1",
register_email="reg@example.com",
digest=b"digest",
is_active=True,
)
self.session.add(api_key)
self.session.commit()
result = self.session.query(APIKeyManagement).filter_by(title="key1").first()
self.assertIsNotNone(result)
self.assertTrue(result.is_active)
self.assertEqual(result.digest, b"digest")

def test_create_and_read_data_collection_campaign_management(self):
campaign = DataCollectionCampaignManagement(
id=uuid.uuid4(),
is_active=True,
end_at=datetime.now() + timedelta(days=1),
)
self.session.add(campaign)
self.session.commit()
result = self.session.query(DataCollectionCampaignManagement).first()
self.assertIsNotNone(result)
self.assertTrue(result.is_active)

def test_create_and_read_chat_message(self):
inferred_user = InferredUser(id=uuid.uuid4())
self.session.add(inferred_user)
self.session.commit()
chat = ChatMessage(
id=uuid.uuid4(),
inferred_user_id=inferred_user.id,
conversation_id=uuid.uuid4(),
role="user",
textual_content="Bonjour",
)
self.session.add(chat)
self.session.commit()
result = self.session.query(ChatMessage).filter_by(role="user").first()
self.assertIsNotNone(result)
self.assertEqual(result.textual_content, "Bonjour")

def test_create_and_read_bookmark(self):
inferred_user = InferredUser(id=uuid.uuid4())
self.session.add(inferred_user)
self.session.commit()
# On suppose que le document existe déjà, sinon il faut mocker ou ignorer la FK
bookmark = Bookmark(
id=uuid.uuid4(),
document_id=uuid.uuid4(),
inferred_user_id=inferred_user.id,
)
self.session.add(bookmark)
self.session.commit()
result = (
self.session.query(Bookmark)
.filter_by(inferred_user_id=inferred_user.id)
.first()
)
self.assertIsNotNone(result)

def test_create_and_read_returned_document(self):
inferred_user = InferredUser(id=uuid.uuid4())
self.session.add(inferred_user)
self.session.commit()
chat = ChatMessage(
id=uuid.uuid4(),
inferred_user_id=inferred_user.id,
conversation_id=uuid.uuid4(),
role="user",
textual_content="test",
)
self.session.add(chat)
self.session.commit()
returned_doc = ReturnedDocument(
id=uuid.uuid4(),
message_id=chat.id,
document_id=uuid.uuid4(),
is_clicked=True,
)
self.session.add(returned_doc)
self.session.commit()
result = self.session.query(ReturnedDocument).filter_by(is_clicked=True).first()
self.assertIsNotNone(result)

def test_create_and_read_endpoint_request(self):
inferred_user = InferredUser(id=uuid.uuid4())
self.session.add(inferred_user)
self.session.commit()
session = UserSession(
id=uuid.uuid4(),
inferred_user_id=inferred_user.id,
origin_referrer="ref",
end_at=datetime.now() + timedelta(hours=1),
host="localhost",
)
self.session.add(session)
self.session.commit()
endpoint = EndpointRequest(
id=uuid.uuid4(),
session_id=session.id,
endpoint_name="test_endpoint",
http_code=200,
message="ok",
)
self.session.add(endpoint)
self.session.commit()
result = (
self.session.query(EndpointRequest)
.filter_by(endpoint_name="test_endpoint")
.first()
)
self.assertIsNotNone(result)
self.assertEqual(result.http_code, 200)
9 changes: 9 additions & 0 deletions welearn_database/alembic/env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from logging.config import fileConfig

from alembic import context
Expand Down Expand Up @@ -75,6 +76,14 @@ def run_migrations_online() -> None:
load_dotenv()
connectable = create_sqlalchemy_engine()

if "prod" in os.getenv("PG_HOST").lower():
print("Connecting to production database for migrations!")
input("Press Enter to continue...")
elif "dev" in os.getenv("PG_HOST" "").lower():
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing comma between arguments in os.getenv call. Should be os.getenv('PG_HOST', '') instead of os.getenv('PG_HOST' '').

Suggested change
elif "dev" in os.getenv("PG_HOST" "").lower():
elif "dev" in os.getenv("PG_HOST", "").lower():

Copilot uses AI. Check for mistakes.
print("Connecting to development database for migrations!")
else:
print("Connecting to unknown database for migrations!")

with connectable.connect() as connection:
context.configure(
connection=connection,
Expand Down
100 changes: 100 additions & 0 deletions welearn_database/alembic/versions/2ad4895b2674_data_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""data collection

Revision ID: 2ad4895b2674
Revises: 068312e7800c
Create Date: 2026-01-16 15:55:41.447852

"""

from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "2ad4895b2674"
down_revision: Union[str, None] = "068312e7800c"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table(
"data_collection_campaign_management",
sa.Column(
"id", sa.Uuid(), server_default=sa.func.gen_random_uuid(), nullable=False
),
sa.Column("is_active", sa.Boolean(), nullable=False),
sa.Column("end_at", postgresql.TIMESTAMP(), nullable=False),
sa.Column(
"created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False
),
sa.PrimaryKeyConstraint("id"),
schema="user_related",
)

op.add_column(
"chat_message",
sa.Column("role", sa.String(), nullable=False),
schema="user_related",
)
op.add_column(
"chat_message",
sa.Column("inferred_user_id", sa.Uuid(), nullable=False),
schema="user_related",
)
op.add_column(
"chat_message",
sa.Column("conversation_id", sa.Uuid(), nullable=False),
schema="user_related",
)
op.drop_constraint(
op.f("message_user_id_fkey"),
"chat_message",
schema="user_related",
type_="foreignkey",
)
op.create_foreign_key(
"message_inferred_user_id_fkey",
"chat_message",
"inferred_user",
["inferred_user_id"],
["id"],
source_schema="user_related",
referent_schema="user_related",
)
op.drop_column("chat_message", "user_id", schema="user_related")
op.add_column(
"returned_document",
sa.Column("is_clicked", sa.Boolean(), nullable=False),
schema="user_related",
)


def downgrade() -> None:
op.drop_column("returned_document", "is_clicked", schema="user_related")
op.add_column(
"chat_message",
sa.Column("user_id", sa.Uuid(), nullable=False),
schema="user_related",
)
op.drop_constraint(
"message_inferred_user_id_fkey",
"chat_message",
schema="user_related",
type_="foreignkey",
)
op.create_foreign_key(
op.f("message_user_id_fkey"),
"chat_message",
"user_profile",
["user_id"],
["id"],
source_schema="user_related",
referent_schema="user_related",
)
op.drop_column("chat_message", "conversation_id", schema="user_related")
op.drop_column("chat_message", "inferred_user_id", schema="user_related")
op.drop_column("chat_message", "role", schema="user_related")
op.drop_table("data_collection_campaign_management", schema="user_related")
29 changes: 26 additions & 3 deletions welearn_database/data/models/user_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,16 @@ class ChatMessage(Base):
id: Mapped[UUID] = mapped_column(
types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
)
user_id = mapped_column(
inferred_user_id = mapped_column(
types.Uuid,
ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.user_profile.id"),
ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.inferred_user.id"),
nullable=False,
)
conversation_id = mapped_column(
types.Uuid,
nullable=False,
)
role: Mapped[str]
textual_content: Mapped[str]

created_at: Mapped[datetime] = mapped_column(
Expand All @@ -105,7 +110,7 @@ class ChatMessage(Base):
server_default="NOW()",
onupdate=func.localtimestamp(),
)
user: Mapped["UserProfile"] = relationship()
inferred_user: Mapped["InferredUser"] = relationship()


class ReturnedDocument(Base):
Expand All @@ -128,6 +133,7 @@ class ReturnedDocument(Base):
),
nullable=False,
)
is_clicked: Mapped[bool] = mapped_column(default=False)
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default value should be set using server_default instead of default for consistency with database-level defaults and to ensure the value is set even when rows are inserted directly via SQL.

Suggested change
is_clicked: Mapped[bool] = mapped_column(default=False)
is_clicked: Mapped[bool] = mapped_column(default=False, server_default="false")

Copilot uses AI. Check for mistakes.
welearn_document: Mapped["WeLearnDocument"] = relationship()
chat_message: Mapped["ChatMessage"] = relationship()

Expand Down Expand Up @@ -158,6 +164,23 @@ class APIKeyManagement(Base):
)


class DataCollectionCampaignManagement(Base):
__tablename__ = "data_collection_campaign_management"
__table_args__ = {"schema": DbSchemaEnum.USER_RELATED.value}

id: Mapped[UUID] = mapped_column(
types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
)
is_active: Mapped[bool]
end_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=False), nullable=False)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP(timezone=False),
nullable=False,
default=func.localtimestamp(),
server_default="NOW()",
)


class Session(Base):
__tablename__ = "session"
__table_args__ = {"schema": "user_related"}
Expand Down
Loading