diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..ba1a63f --- /dev/null +++ b/.env.example @@ -0,0 +1,9 @@ +PG_USER=welearn_datastack +PG_PASSWORD=... +PG_HOST=... +PG_PORT=5432 +PG_DB=welearn_datastack +PG_DRIVER=postgresql+psycopg2 +PG_SCHEMA=document_related,corpus_related,user_related,agent_related +LOG_LEVEL=INFO +LOG_FORMAT=[%(asctime)s][%(name)s][%(levelname)s] - %(message)s \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4b67a2c..2f1b486 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "welearn-database" -version = "1.2.0" +version = "1.3.0" description = "All stuff related to relationnal database from the WeLearn project" authors = [ {name = "Théo",email = "theo.nardin@cri-paris.org"} diff --git a/tests/test_user_related.py b/tests/test_user_related.py new file mode 100644 index 0000000..4c911c9 --- /dev/null +++ b/tests/test_user_related.py @@ -0,0 +1,195 @@ +import uuid +from datetime import datetime, timedelta +from unittest import TestCase + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from tests.helpers import handle_schema_with_sqlite +from welearn_database.data.models import Base +from welearn_database.data.models.user_related import ( + APIKeyManagement, + Bookmark, + ChatMessage, + DataCollectionCampaignManagement, + EndpointRequest, + InferredUser, + ReturnedDocument, +) +from welearn_database.data.models.user_related import Session as UserSession +from welearn_database.data.models.user_related import ( + UserProfile, +) + + +class TestUserRelatedCRUD(TestCase): + def setUp(self): + self.engine = create_engine("sqlite://") + handle_schema_with_sqlite(self.engine) + self.s_maker = sessionmaker(self.engine) + Base.metadata.create_all(self.engine) + self.session = self.s_maker() + + def tearDown(self): + self.session.close() + Base.metadata.drop_all(self.engine) + + def test_create_and_read_user_profile(self): + user = UserProfile( + id=uuid.uuid4(), + username="testuser", + email="test@example.com", + password_digest=b"hashed", + ) + self.session.add(user) + self.session.commit() + result = self.session.query(UserProfile).filter_by(username="testuser").first() + self.assertIsNotNone(result) + self.assertEqual(result.email, "test@example.com") + + def test_create_and_read_inferred_user(self): + inferred_user = InferredUser( + id=uuid.uuid4(), + origin_referrer="test_ref", + ) + self.session.add(inferred_user) + self.session.commit() + result = ( + self.session.query(InferredUser) + .filter_by(origin_referrer="test_ref") + .first() + ) + self.assertIsNotNone(result) + + def test_create_and_read_session(self): + inferred_user = InferredUser(id=uuid.uuid4()) + self.session.add(inferred_user) + self.session.commit() + session = UserSession( + id=uuid.uuid4(), + inferred_user_id=inferred_user.id, + origin_referrer="ref", + end_at=datetime.now() + timedelta(hours=1), + host="localhost", + ) + self.session.add(session) + self.session.commit() + result = self.session.query(UserSession).filter_by(host="localhost").first() + self.assertIsNotNone(result) + + def test_create_and_read_api_key_management(self): + api_key = APIKeyManagement( + id=uuid.uuid4(), + title="key1", + register_email="reg@example.com", + digest=b"digest", + is_active=True, + ) + self.session.add(api_key) + self.session.commit() + result = self.session.query(APIKeyManagement).filter_by(title="key1").first() + self.assertIsNotNone(result) + self.assertTrue(result.is_active) + self.assertEqual(result.digest, b"digest") + + def test_create_and_read_data_collection_campaign_management(self): + campaign = DataCollectionCampaignManagement( + id=uuid.uuid4(), + is_active=True, + end_at=datetime.now() + timedelta(days=1), + ) + self.session.add(campaign) + self.session.commit() + result = self.session.query(DataCollectionCampaignManagement).first() + self.assertIsNotNone(result) + self.assertTrue(result.is_active) + + def test_create_and_read_chat_message(self): + inferred_user = InferredUser(id=uuid.uuid4()) + self.session.add(inferred_user) + self.session.commit() + chat = ChatMessage( + id=uuid.uuid4(), + inferred_user_id=inferred_user.id, + conversation_id=uuid.uuid4(), + role="user", + textual_content="Bonjour", + ) + self.session.add(chat) + self.session.commit() + result = self.session.query(ChatMessage).filter_by(role="user").first() + self.assertIsNotNone(result) + self.assertEqual(result.textual_content, "Bonjour") + + def test_create_and_read_bookmark(self): + inferred_user = InferredUser(id=uuid.uuid4()) + self.session.add(inferred_user) + self.session.commit() + # On suppose que le document existe déjà, sinon il faut mocker ou ignorer la FK + bookmark = Bookmark( + id=uuid.uuid4(), + document_id=uuid.uuid4(), + inferred_user_id=inferred_user.id, + ) + self.session.add(bookmark) + self.session.commit() + result = ( + self.session.query(Bookmark) + .filter_by(inferred_user_id=inferred_user.id) + .first() + ) + self.assertIsNotNone(result) + + def test_create_and_read_returned_document(self): + inferred_user = InferredUser(id=uuid.uuid4()) + self.session.add(inferred_user) + self.session.commit() + chat = ChatMessage( + id=uuid.uuid4(), + inferred_user_id=inferred_user.id, + conversation_id=uuid.uuid4(), + role="user", + textual_content="test", + ) + self.session.add(chat) + self.session.commit() + returned_doc = ReturnedDocument( + id=uuid.uuid4(), + message_id=chat.id, + document_id=uuid.uuid4(), + is_clicked=True, + ) + self.session.add(returned_doc) + self.session.commit() + result = self.session.query(ReturnedDocument).filter_by(is_clicked=True).first() + self.assertIsNotNone(result) + + def test_create_and_read_endpoint_request(self): + inferred_user = InferredUser(id=uuid.uuid4()) + self.session.add(inferred_user) + self.session.commit() + session = UserSession( + id=uuid.uuid4(), + inferred_user_id=inferred_user.id, + origin_referrer="ref", + end_at=datetime.now() + timedelta(hours=1), + host="localhost", + ) + self.session.add(session) + self.session.commit() + endpoint = EndpointRequest( + id=uuid.uuid4(), + session_id=session.id, + endpoint_name="test_endpoint", + http_code=200, + message="ok", + ) + self.session.add(endpoint) + self.session.commit() + result = ( + self.session.query(EndpointRequest) + .filter_by(endpoint_name="test_endpoint") + .first() + ) + self.assertIsNotNone(result) + self.assertEqual(result.http_code, 200) diff --git a/welearn_database/alembic/env.py b/welearn_database/alembic/env.py index 6e2a72b..285a308 100644 --- a/welearn_database/alembic/env.py +++ b/welearn_database/alembic/env.py @@ -1,3 +1,4 @@ +import os from logging.config import fileConfig from alembic import context @@ -75,6 +76,14 @@ def run_migrations_online() -> None: load_dotenv() connectable = create_sqlalchemy_engine() + if "prod" in os.getenv("PG_HOST").lower(): + print("Connecting to production database for migrations!") + input("Press Enter to continue...") + elif "dev" in os.getenv("PG_HOST" "").lower(): + print("Connecting to development database for migrations!") + else: + print("Connecting to unknown database for migrations!") + with connectable.connect() as connection: context.configure( connection=connection, diff --git a/welearn_database/alembic/versions/2ad4895b2674_data_collection.py b/welearn_database/alembic/versions/2ad4895b2674_data_collection.py new file mode 100644 index 0000000..698fa3f --- /dev/null +++ b/welearn_database/alembic/versions/2ad4895b2674_data_collection.py @@ -0,0 +1,100 @@ +"""data collection + +Revision ID: 2ad4895b2674 +Revises: 068312e7800c +Create Date: 2026-01-16 15:55:41.447852 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "2ad4895b2674" +down_revision: Union[str, None] = "068312e7800c" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "data_collection_campaign_management", + sa.Column( + "id", sa.Uuid(), server_default=sa.func.gen_random_uuid(), nullable=False + ), + sa.Column("is_active", sa.Boolean(), nullable=False), + sa.Column("end_at", postgresql.TIMESTAMP(), nullable=False), + sa.Column( + "created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False + ), + sa.PrimaryKeyConstraint("id"), + schema="user_related", + ) + + op.add_column( + "chat_message", + sa.Column("role", sa.String(), nullable=False), + schema="user_related", + ) + op.add_column( + "chat_message", + sa.Column("inferred_user_id", sa.Uuid(), nullable=False), + schema="user_related", + ) + op.add_column( + "chat_message", + sa.Column("conversation_id", sa.Uuid(), nullable=False), + schema="user_related", + ) + op.drop_constraint( + op.f("message_user_id_fkey"), + "chat_message", + schema="user_related", + type_="foreignkey", + ) + op.create_foreign_key( + "message_inferred_user_id_fkey", + "chat_message", + "inferred_user", + ["inferred_user_id"], + ["id"], + source_schema="user_related", + referent_schema="user_related", + ) + op.drop_column("chat_message", "user_id", schema="user_related") + op.add_column( + "returned_document", + sa.Column("is_clicked", sa.Boolean(), nullable=False), + schema="user_related", + ) + + +def downgrade() -> None: + op.drop_column("returned_document", "is_clicked", schema="user_related") + op.add_column( + "chat_message", + sa.Column("user_id", sa.Uuid(), nullable=False), + schema="user_related", + ) + op.drop_constraint( + "message_inferred_user_id_fkey", + "chat_message", + schema="user_related", + type_="foreignkey", + ) + op.create_foreign_key( + op.f("message_user_id_fkey"), + "chat_message", + "user_profile", + ["user_id"], + ["id"], + source_schema="user_related", + referent_schema="user_related", + ) + op.drop_column("chat_message", "conversation_id", schema="user_related") + op.drop_column("chat_message", "inferred_user_id", schema="user_related") + op.drop_column("chat_message", "role", schema="user_related") + op.drop_table("data_collection_campaign_management", schema="user_related") diff --git a/welearn_database/data/models/user_related.py b/welearn_database/data/models/user_related.py index 3532a81..9c3af47 100644 --- a/welearn_database/data/models/user_related.py +++ b/welearn_database/data/models/user_related.py @@ -85,11 +85,16 @@ class ChatMessage(Base): id: Mapped[UUID] = mapped_column( types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()" ) - user_id = mapped_column( + inferred_user_id = mapped_column( types.Uuid, - ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.user_profile.id"), + ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.inferred_user.id"), nullable=False, ) + conversation_id = mapped_column( + types.Uuid, + nullable=False, + ) + role: Mapped[str] textual_content: Mapped[str] created_at: Mapped[datetime] = mapped_column( @@ -105,7 +110,7 @@ class ChatMessage(Base): server_default="NOW()", onupdate=func.localtimestamp(), ) - user: Mapped["UserProfile"] = relationship() + inferred_user: Mapped["InferredUser"] = relationship() class ReturnedDocument(Base): @@ -128,6 +133,7 @@ class ReturnedDocument(Base): ), nullable=False, ) + is_clicked: Mapped[bool] = mapped_column(default=False) welearn_document: Mapped["WeLearnDocument"] = relationship() chat_message: Mapped["ChatMessage"] = relationship() @@ -158,6 +164,23 @@ class APIKeyManagement(Base): ) +class DataCollectionCampaignManagement(Base): + __tablename__ = "data_collection_campaign_management" + __table_args__ = {"schema": DbSchemaEnum.USER_RELATED.value} + + id: Mapped[UUID] = mapped_column( + types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()" + ) + is_active: Mapped[bool] + end_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=False), nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=False), + nullable=False, + default=func.localtimestamp(), + server_default="NOW()", + ) + + class Session(Base): __tablename__ = "session" __table_args__ = {"schema": "user_related"}