diff --git a/README.md b/README.md index 14956fb..ef87f47 100644 --- a/README.md +++ b/README.md @@ -65,3 +65,11 @@ - 취준생을 위한이면 취업에 필요한 단어를 추출해서 카테고리 만들기 - ES 사용? - 추천하는 방식 → 단어 수 기반 + + +### 4. 남은 과업(wootaegyeoung) +- 데이터베이스 연결 + - 현재 mysql은 업로드 됨. + - mongoDB 생성후 이미지로 만들어서 배포. + +- 사용자기반 추천 api 코드 생성 \ No newline at end of file diff --git a/fastapi-server/app/db.py b/fastapi-server/app/db.py index a921a2d..7c3c3e8 100644 --- a/fastapi-server/app/db.py +++ b/fastapi-server/app/db.py @@ -1,25 +1,4 @@ -from sqlalchemy import create_engine -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker, scoped_session +from motor.motor_asyncio import AsyncIOMotorClient -user_name="" -password="" -db_host="127.0.0.1" -db_name="" - -DATABASE='mysql://'+user_name+':'+password+'@'+db_host+'/'+db_name+"?charset=utf8" - -engine = create_engine( - DATABASE, - encoding='utf8', - echo=True) - -sesstion=scoped_session( - sessionmaker( - autocommit=False, - autoflush=False, - bind=engine - )) - -Base = declarative_base() -Base.query = sesstion.query_property() +client = AsyncIOMotorClient('mongodb://localhost:27017') +db = client.news_database diff --git a/fastapi-server/app/dto/RecommendDTO.py b/fastapi-server/app/dto/RecommendDTO.py new file mode 100644 index 0000000..604378d --- /dev/null +++ b/fastapi-server/app/dto/RecommendDTO.py @@ -0,0 +1,17 @@ +from typing import Optional, List + +from pydantic import BaseModel + +class NewsRecommendationDTO_Req(BaseModel): + name: str + user_category: List[float] + num : int + + + class Config: + arbitrary_types_allowed = True + + +class recommend_news_similarity_InputData(BaseModel): + category_array: List[int] + top_n: int diff --git a/fastapi-server/app/dto/userCategory.py b/fastapi-server/app/dto/userCategory.py new file mode 100644 index 0000000..fea1f6a --- /dev/null +++ b/fastapi-server/app/dto/userCategory.py @@ -0,0 +1,14 @@ +from typing import Optional, List + +from pydantic import BaseModel + +class UserUpdateDTO_Req(BaseModel): + name: str + user_category: List[float] + new_category: List[float] + + class Config: + arbitrary_types_allowed = True + +class UserUpdateDTO_Res(BaseModel): + new_user_category: List[float] diff --git a/fastapi-server/app/main.py b/fastapi-server/app/main.py index 7d6d5a8..1edc98b 100644 --- a/fastapi-server/app/main.py +++ b/fastapi-server/app/main.py @@ -1,11 +1,25 @@ -from fastapi import FastAPI, Query -from app.services.recommend import ( - recommend_news, - recommend_popular_news, - recommend_based_on_demographics, - recommend_based_on_new_words, +from random import random +from typing import Optional + +import numpy as np +import uvicorn +from fastapi import FastAPI, Query, HTTPException +from sklearn.metrics.pairwise import cosine_similarity + +from dto.RecommendDTO import NewsRecommendationDTO_Req, recommend_news_similarity_InputData +from services.recommend_user_data import update_interest_vector, find_similar_items +from dto.userCategory import UserUpdateDTO_Res, UserUpdateDTO_Req +from services.recommend import ( + recommend_news, + recommend_popular_news, + recommend_based_on_demographics, + recommend_based_on_new_words, recommend_for_job_seekers ) +from services.news_category_jsonStructure import * + + +from services.searchNews import fetch_all_news app = FastAPI() @@ -34,5 +48,67 @@ def job_seekers(): recommendations = recommend_for_job_seekers() return {"recommendations": recommendations} + + + +# return all news +@app.get("/news", response_model=List[Dict]) +async def get_news(): + return await fetch_all_news(news_item_helper) + +# retrun category of each news +@app.get("/news_category", response_model=List[Dict]) +async def get_news_category(): + return await fetch_all_news(news_Category_helper) + +# @app.get("/news_recommendation", response_model=List[Dict]) +# async def get_news_recommendation(data : NewsRecommendationDTO_Req): +# #할일 : 현재 어떤 사용자의 어떤 부분을 반영할지를 정하지 않음. 이부분을 다시 반영해야할듯. +# +# result=fetch_all_news(news_Category_helper) +# #할일 : 위의 결과를 처리해서 벡터만 있는 list로 변환해야함. +# +# similar_item_indices, similar_item_scores= find_similar_items(data.user_category, result, top_n=data.num) +# +# # 할일 : similar_item_indices로 뉴스 순서에 대한 검색을 진행해서 검색된 결과를 반환. +# +# return + +@app.post("/news/recommend/similarity") +async def recommend_news_similarity(input_data: recommend_news_similarity_InputData): + news_data=await fetch_all_news(news_Category_helper); + input_vector = np.array(input_data.category_array).reshape(1, -1) + news_vectors = np.array([news['category_array'] for news in news_data]) + similarities = cosine_similarity(input_vector, news_vectors).flatten() + similar_news = sorted(zip(news_data, similarities), key=lambda x: x[1], reverse=True) + top_similar_news = similar_news[:input_data.top_n] + return [ + { + "news_id": news[0]["news_id"], + "similarity": news[1] + } for news in top_similar_news + ] + +@app.get("/news/{news_id}/") +async def get_news_by_id(news_id: str): + # news_id를 기반으로 뉴스 데이터 검색 + for news in await fetch_all_news(news_item_helper): + if news["news_id"] == news_id: + return news + raise HTTPException(status_code=404, detail="News not found") + +# 사용자 카테고리 벡터와 뉴스 id를 기반으로 카테고리 벡터를 업데이트 +# 뉴스 아이디에서 벡터 추출과정 추가해야됨. +@app.get("/news_user_update", response_model=UserUpdateDTO_Res) +async def get_news_recommendation(data: UserUpdateDTO_Req): + response_data = UserUpdateDTO_Res( + new_user_category=update_interest_vector(data.user_category, data.new_category) + ) + return response_data + + + + + if __name__ == "__main__": - uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) + uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True) diff --git a/fastapi-server/app/models/category.py b/fastapi-server/app/models/category.py new file mode 100644 index 0000000..7899c89 --- /dev/null +++ b/fastapi-server/app/models/category.py @@ -0,0 +1,8 @@ +from typing import List + +from pydantic import BaseModel + + +class NewsItemCategory(BaseModel): + news_id: str + category_array: List[int] \ No newline at end of file diff --git a/fastapi-server/app/models/news.py b/fastapi-server/app/models/news.py new file mode 100644 index 0000000..e0323a4 --- /dev/null +++ b/fastapi-server/app/models/news.py @@ -0,0 +1,20 @@ +from datetime import datetime +from typing import List, Optional + +from pydantic import BaseModel + +class NewsItem(BaseModel): + news_id: str + title: str + content: str + hilight: str + published_at: datetime + enveloped_at: datetime + dateline: datetime + provider: str + category: List[str] + category_incident: List[str] + byline: str + provider_link_page: str + printing_page: Optional[str] = None + diff --git a/fastapi-server/app/services/news_category_jsonStructure.py b/fastapi-server/app/services/news_category_jsonStructure.py new file mode 100644 index 0000000..06b827b --- /dev/null +++ b/fastapi-server/app/services/news_category_jsonStructure.py @@ -0,0 +1,42 @@ +from typing import List, Dict + +from pydantic import BaseModel + +CATEGORY_INDEX = { + "IT_과학>과학": 0, + "IT_과학>보안": 1, + "IT_과학>모바일": 2, + "IT_과학>콘텐츠": 3, + "IT_과학>인터넷_SNS": 4, + "IT_과학>IT_과학일반": 5 +} + +def category_to_array(categories: List[str]) -> List[int]: + array = [0] * len(CATEGORY_INDEX) + for category in categories: + if category in CATEGORY_INDEX: + array[CATEGORY_INDEX[category]] = 1 + return array + +def news_Category_helper(news: Dict) -> Dict: + return { + "news_id": news.get("news_id", ""), + "category_array": category_to_array(news.get("category", [])) + } + +def news_item_helper(news_item) -> dict: + return { + "news_id": news_item.get("news_id", ""), + "title": news_item.get("title", ""), + "content": news_item.get("content", ""), + "hilight": news_item.get("hilight", ""), + "published_at": news_item.get("published_at", ""), + "enveloped_at": news_item.get("enveloped_at", ""), + "dateline": news_item.get("dateline", ""), + "provider": news_item.get("provider", ""), + "category": news_item.get("category", []), + "category_incident": news_item.get("category_incident", []), + "byline": news_item.get("byline", ""), + "provider_link_page": news_item.get("provider_link_page", ""), + "printing_page": news_item.get("printing_page", ""), + } diff --git a/fastapi-server/app/services/recommend.py b/fastapi-server/app/services/recommend.py index 2e155d9..5c31c45 100644 --- a/fastapi-server/app/services/recommend.py +++ b/fastapi-server/app/services/recommend.py @@ -1,5 +1,5 @@ from elasticsearch import Elasticsearch -from app.config import ELASTICSEARCH_HOST +from config import ELASTICSEARCH_HOST es = Elasticsearch([ELASTICSEARCH_HOST]) diff --git a/fastapi-server/app/services/recommend_user_data.py b/fastapi-server/app/services/recommend_user_data.py new file mode 100644 index 0000000..f9e615a --- /dev/null +++ b/fastapi-server/app/services/recommend_user_data.py @@ -0,0 +1,83 @@ +# 유저의 관심 카테고리 업데이트 +import numpy as np +from sklearn.neighbors import NearestNeighbors + +#update interest Category +def update_interest_vector(interest_vector, new_interests, increment=0.1, decrement=0.1): + for i in range(len(new_interests)): + if new_interests[i] == 1: + interest_vector[i] += increment + + interest_vector = np.minimum(interest_vector, 1.0) + + for i in range(len(interest_vector)): + if new_interests[i] == 0: + interest_vector[i] = (interest_vector[i] - decrement) ** 2 + + return interest_vector + +# user_based recommendation, content_based recommendation +def combine_vectors(keyword_vector, field_vector, media_vector): + keyword_vector_squared = np.square(keyword_vector) + field_vector_squared = np.square(field_vector) + media_vector_squared = np.square(media_vector) + return np.concatenate((keyword_vector, field_vector, media_vector)) + +def find_similar_items(target_item_vectors, all_item_vectors, top_n=5): + combined_all_item_vectors = [combine_vectors(*item_vectors) for item_vectors in all_item_vectors] + combined_target_item_vector = combine_vectors(*target_item_vectors) + + nbrs = NearestNeighbors(n_neighbors=top_n, algorithm='auto', metric='cosine').fit(combined_all_item_vectors) + distances, indices = nbrs.kneighbors([combined_target_item_vector]) + similar_item_scores = 1 - distances[0] + + return indices[0], similar_item_scores + +# ------update_interest_vector 사용예시 + +# interest_vector = np.array([1.0, 0.5, 0.8]) +# new_interests = np.array([0, 1, 0]) +# interest_vector = update_interest_vector(interest_vector, new_interests) +# print(interest_vector) # [0.81, 0.6, 0.49] + +# new_interests = np.array([1, 0, 1]) +# interest_vector = update_interest_vector(interest_vector, new_interests) +# print(interest_vector) # [0.91, 0.25, 0.59] + + +#------combine_vectors, find_similar_items 사용예시 +# target_keyword_vector = np.random.rand(5) +# target_field_vector = np.random.rand(3) +# target_media_vector = np.random.rand(4) + +# all_item_vectors = [ +# (np.random.rand(5), np.random.rand(3), np.random.rand(4)) for _ in range(1000) +# ] + +# similar_item_indices, similar_item_scores = find_similar_items( +# (target_keyword_vector, target_field_vector, target_media_vector), +# all_item_vectors, +# top_n=5 +# ) + +# print("유사한 아이템 인덱스 (유사도 순):", similar_item_indices) +# print("유사한 아이템들의 유사도 점수:", similar_item_scores) + +# target_keyword_vector = np.random.rand(5) +# target_field_vector = np.random.rand(3) +# target_media_vector = np.random.rand(4) + +# all_item_vectors = [ +# (np.random.rand(5), np.random.rand(3), np.random.rand(4)) for _ in range(1000) +# ] + +# similar_item_indices, similar_item_scores = find_similar_items( +# (target_keyword_vector, target_field_vector, target_media_vector), +# all_item_vectors, +# top_n=5 +# ) + +# print("유사한 아이템 인덱스 (유사도 순):", similar_item_indices) +# print("유사한 아이템들의 유사도 점수:", similar_item_scores) + + diff --git a/fastapi-server/app/services/searchNews.py b/fastapi-server/app/services/searchNews.py new file mode 100644 index 0000000..2920ec1 --- /dev/null +++ b/fastapi-server/app/services/searchNews.py @@ -0,0 +1,22 @@ +from http.client import HTTPException +from typing import List, Dict + +from db import db + + +async def fetch_all_news(helper_func) -> List[Dict]: + news_collection = db.news_collection + news_documents = await news_collection.find().to_list(None) + if news_documents: + return [helper_func(news) for doc in news_documents for news in doc.get("documents", [])] + else: + raise HTTPException(status_code=404, detail="News items not found") + + +async def fetch_find_news(helper_func, ars) -> List[Dict]: + news_collection = db.news_collection + news_documents = await news_collection.find().to_list(None) + if news_documents: + return [helper_func(news) for doc in news_documents for news in doc.get("documents", [])] + else: + raise HTTPException(status_code=404, detail="News items not found") \ No newline at end of file