1+ import json
12from enum import Enum
23from typing import List , Optional , Union
34
1011 OpenAIEmbeddingModel ,
1112 SentenceTransformerEmbeddingModel ,
1213)
14+ from reranker import get_scores
15+
16+ # from langchain.schema import Document
17+ from splitter import get_split_documents_using_token_based
1318
1419load_dotenv ()
1520
@@ -35,6 +40,22 @@ class RequestSchemaForEmbeddings(BaseModel):
3540 base_url : Optional [str ] = None
3641
3742
43+ class RequestSchemaForTextSplitter (BaseModel ):
44+ """Request Schema"""
45+
46+ model : str
47+ documents : str
48+ chunk_size : int
49+ chunk_overlap : int
50+
51+
52+ class RequestSchemaForReRankers (BaseModel ):
53+ """Request Schema"""
54+
55+ query : str
56+ documents : List [str ]
57+
58+
3859@app .get ("/" )
3960async def home ():
4061 """Returns a message"""
@@ -70,3 +91,18 @@ def generate(em_model, texts):
7091 elif type_model == EmbeddingModelType .OPENAI :
7192 embedding_model = OpenAIEmbeddingModel (model = name_model )
7293 return generate (em_model = embedding_model , texts = texts )
94+
95+
96+ @app .post ("/split_docs_based_on_tokens" )
97+ async def get_split_docs (item : RequestSchemaForTextSplitter ):
98+ """Splits the documents using the model tokenization method"""
99+ docs = json .loads (item .documents )
100+ return get_split_documents_using_token_based (
101+ model_name = item .model , documents = docs , chunk_size = item .chunk_size , chunk_overlap = item .chunk_overlap
102+ )
103+
104+
105+ @app .post ("/docs_reranking_scores" )
106+ async def get_reranked_docs (item : RequestSchemaForReRankers ):
107+ """Get reranked documents"""
108+ return get_scores (item .query , item .documents )
0 commit comments