Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,11 @@
- 취준생을 위한이면 취업에 필요한 단어를 추출해서 카테고리 만들기
- ES 사용?
- 추천하는 방식 → 단어 수 기반


### 4. 남은 과업(wootaegyeoung)
- 데이터베이스 연결
- 현재 mysql은 업로드 됨.
- mongoDB 생성후 이미지로 만들어서 배포.

- 사용자기반 추천 api 코드 생성
27 changes: 3 additions & 24 deletions fastapi-server/app/db.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,4 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session
from motor.motor_asyncio import AsyncIOMotorClient

user_name=""
password=""
db_host="127.0.0.1"
db_name=""

DATABASE='mysql://'+user_name+':'+password+'@'+db_host+'/'+db_name+"?charset=utf8"

engine = create_engine(
DATABASE,
encoding='utf8',
echo=True)

sesstion=scoped_session(
sessionmaker(
autocommit=False,
autoflush=False,
bind=engine
))

Base = declarative_base()
Base.query = sesstion.query_property()
client = AsyncIOMotorClient('mongodb://localhost:27017')
db = client.news_database
17 changes: 17 additions & 0 deletions fastapi-server/app/dto/RecommendDTO.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Optional, List

from pydantic import BaseModel

class NewsRecommendationDTO_Req(BaseModel):
name: str
user_category: List[float]
num : int


class Config:
arbitrary_types_allowed = True


class recommend_news_similarity_InputData(BaseModel):
category_array: List[int]
top_n: int
14 changes: 14 additions & 0 deletions fastapi-server/app/dto/userCategory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import Optional, List

from pydantic import BaseModel

class UserUpdateDTO_Req(BaseModel):
name: str
user_category: List[float]
new_category: List[float]

class Config:
arbitrary_types_allowed = True

class UserUpdateDTO_Res(BaseModel):
new_user_category: List[float]
90 changes: 83 additions & 7 deletions fastapi-server/app/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
from fastapi import FastAPI, Query
from app.services.recommend import (
recommend_news,
recommend_popular_news,
recommend_based_on_demographics,
recommend_based_on_new_words,
from random import random
from typing import Optional

import numpy as np
import uvicorn
from fastapi import FastAPI, Query, HTTPException
from sklearn.metrics.pairwise import cosine_similarity

from dto.RecommendDTO import NewsRecommendationDTO_Req, recommend_news_similarity_InputData
from services.recommend_user_data import update_interest_vector, find_similar_items
from dto.userCategory import UserUpdateDTO_Res, UserUpdateDTO_Req
from services.recommend import (
recommend_news,
recommend_popular_news,
recommend_based_on_demographics,
recommend_based_on_new_words,
recommend_for_job_seekers
)
from services.news_category_jsonStructure import *


from services.searchNews import fetch_all_news

app = FastAPI()

Expand Down Expand Up @@ -34,5 +48,67 @@ def job_seekers():
recommendations = recommend_for_job_seekers()
return {"recommendations": recommendations}




# return all news
@app.get("/news", response_model=List[Dict])
async def get_news():
return await fetch_all_news(news_item_helper)

# retrun category of each news
@app.get("/news_category", response_model=List[Dict])
async def get_news_category():
return await fetch_all_news(news_Category_helper)

# @app.get("/news_recommendation", response_model=List[Dict])
# async def get_news_recommendation(data : NewsRecommendationDTO_Req):
# #할일 : 현재 어떤 사용자의 어떤 부분을 반영할지를 정하지 않음. 이부분을 다시 반영해야할듯.
#
# result=fetch_all_news(news_Category_helper)
# #할일 : 위의 결과를 처리해서 벡터만 있는 list로 변환해야함.
#
# similar_item_indices, similar_item_scores= find_similar_items(data.user_category, result, top_n=data.num)
#
# # 할일 : similar_item_indices로 뉴스 순서에 대한 검색을 진행해서 검색된 결과를 반환.
#
# return

@app.post("/news/recommend/similarity")
async def recommend_news_similarity(input_data: recommend_news_similarity_InputData):
news_data=await fetch_all_news(news_Category_helper);
input_vector = np.array(input_data.category_array).reshape(1, -1)
news_vectors = np.array([news['category_array'] for news in news_data])
similarities = cosine_similarity(input_vector, news_vectors).flatten()
similar_news = sorted(zip(news_data, similarities), key=lambda x: x[1], reverse=True)
top_similar_news = similar_news[:input_data.top_n]
return [
{
"news_id": news[0]["news_id"],
"similarity": news[1]
} for news in top_similar_news
]

@app.get("/news/{news_id}/")
async def get_news_by_id(news_id: str):
# news_id를 기반으로 뉴스 데이터 검색
for news in await fetch_all_news(news_item_helper):
if news["news_id"] == news_id:
return news
raise HTTPException(status_code=404, detail="News not found")

# 사용자 카테고리 벡터와 뉴스 id를 기반으로 카테고리 벡터를 업데이트
# 뉴스 아이디에서 벡터 추출과정 추가해야됨.
@app.get("/news_user_update", response_model=UserUpdateDTO_Res)
async def get_news_recommendation(data: UserUpdateDTO_Req):
response_data = UserUpdateDTO_Res(
new_user_category=update_interest_vector(data.user_category, data.new_category)
)
return response_data





if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
8 changes: 8 additions & 0 deletions fastapi-server/app/models/category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import List

from pydantic import BaseModel


class NewsItemCategory(BaseModel):
news_id: str
category_array: List[int]
20 changes: 20 additions & 0 deletions fastapi-server/app/models/news.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import datetime
from typing import List, Optional

from pydantic import BaseModel

class NewsItem(BaseModel):
news_id: str
title: str
content: str
hilight: str
published_at: datetime
enveloped_at: datetime
dateline: datetime
provider: str
category: List[str]
category_incident: List[str]
byline: str
provider_link_page: str
printing_page: Optional[str] = None

42 changes: 42 additions & 0 deletions fastapi-server/app/services/news_category_jsonStructure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List, Dict

from pydantic import BaseModel

CATEGORY_INDEX = {
"IT_과학>과학": 0,
"IT_과학>보안": 1,
"IT_과학>모바일": 2,
"IT_과학>콘텐츠": 3,
"IT_과학>인터넷_SNS": 4,
"IT_과학>IT_과학일반": 5
}

def category_to_array(categories: List[str]) -> List[int]:
array = [0] * len(CATEGORY_INDEX)
for category in categories:
if category in CATEGORY_INDEX:
array[CATEGORY_INDEX[category]] = 1
return array

def news_Category_helper(news: Dict) -> Dict:
return {
"news_id": news.get("news_id", ""),
"category_array": category_to_array(news.get("category", []))
}

def news_item_helper(news_item) -> dict:
return {
"news_id": news_item.get("news_id", ""),
"title": news_item.get("title", ""),
"content": news_item.get("content", ""),
"hilight": news_item.get("hilight", ""),
"published_at": news_item.get("published_at", ""),
"enveloped_at": news_item.get("enveloped_at", ""),
"dateline": news_item.get("dateline", ""),
"provider": news_item.get("provider", ""),
"category": news_item.get("category", []),
"category_incident": news_item.get("category_incident", []),
"byline": news_item.get("byline", ""),
"provider_link_page": news_item.get("provider_link_page", ""),
"printing_page": news_item.get("printing_page", ""),
}
2 changes: 1 addition & 1 deletion fastapi-server/app/services/recommend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from elasticsearch import Elasticsearch
from app.config import ELASTICSEARCH_HOST
from config import ELASTICSEARCH_HOST

es = Elasticsearch([ELASTICSEARCH_HOST])

Expand Down
83 changes: 83 additions & 0 deletions fastapi-server/app/services/recommend_user_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# 유저의 관심 카테고리 업데이트
import numpy as np
from sklearn.neighbors import NearestNeighbors

#update interest Category
def update_interest_vector(interest_vector, new_interests, increment=0.1, decrement=0.1):
for i in range(len(new_interests)):
if new_interests[i] == 1:
interest_vector[i] += increment

interest_vector = np.minimum(interest_vector, 1.0)

for i in range(len(interest_vector)):
if new_interests[i] == 0:
interest_vector[i] = (interest_vector[i] - decrement) ** 2

return interest_vector

# user_based recommendation, content_based recommendation
def combine_vectors(keyword_vector, field_vector, media_vector):
keyword_vector_squared = np.square(keyword_vector)
field_vector_squared = np.square(field_vector)
media_vector_squared = np.square(media_vector)
return np.concatenate((keyword_vector, field_vector, media_vector))

def find_similar_items(target_item_vectors, all_item_vectors, top_n=5):
combined_all_item_vectors = [combine_vectors(*item_vectors) for item_vectors in all_item_vectors]
combined_target_item_vector = combine_vectors(*target_item_vectors)

nbrs = NearestNeighbors(n_neighbors=top_n, algorithm='auto', metric='cosine').fit(combined_all_item_vectors)
distances, indices = nbrs.kneighbors([combined_target_item_vector])
similar_item_scores = 1 - distances[0]

return indices[0], similar_item_scores

# ------update_interest_vector 사용예시

# interest_vector = np.array([1.0, 0.5, 0.8])
# new_interests = np.array([0, 1, 0])
# interest_vector = update_interest_vector(interest_vector, new_interests)
# print(interest_vector) # [0.81, 0.6, 0.49]

# new_interests = np.array([1, 0, 1])
# interest_vector = update_interest_vector(interest_vector, new_interests)
# print(interest_vector) # [0.91, 0.25, 0.59]


#------combine_vectors, find_similar_items 사용예시
# target_keyword_vector = np.random.rand(5)
# target_field_vector = np.random.rand(3)
# target_media_vector = np.random.rand(4)

# all_item_vectors = [
# (np.random.rand(5), np.random.rand(3), np.random.rand(4)) for _ in range(1000)
# ]

# similar_item_indices, similar_item_scores = find_similar_items(
# (target_keyword_vector, target_field_vector, target_media_vector),
# all_item_vectors,
# top_n=5
# )

# print("유사한 아이템 인덱스 (유사도 순):", similar_item_indices)
# print("유사한 아이템들의 유사도 점수:", similar_item_scores)

# target_keyword_vector = np.random.rand(5)
# target_field_vector = np.random.rand(3)
# target_media_vector = np.random.rand(4)

# all_item_vectors = [
# (np.random.rand(5), np.random.rand(3), np.random.rand(4)) for _ in range(1000)
# ]

# similar_item_indices, similar_item_scores = find_similar_items(
# (target_keyword_vector, target_field_vector, target_media_vector),
# all_item_vectors,
# top_n=5
# )

# print("유사한 아이템 인덱스 (유사도 순):", similar_item_indices)
# print("유사한 아이템들의 유사도 점수:", similar_item_scores)


22 changes: 22 additions & 0 deletions fastapi-server/app/services/searchNews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from http.client import HTTPException
from typing import List, Dict

from db import db


async def fetch_all_news(helper_func) -> List[Dict]:
news_collection = db.news_collection
news_documents = await news_collection.find().to_list(None)
if news_documents:
return [helper_func(news) for doc in news_documents for news in doc.get("documents", [])]
else:
raise HTTPException(status_code=404, detail="News items not found")


async def fetch_find_news(helper_func, ars) -> List[Dict]:
news_collection = db.news_collection
news_documents = await news_collection.find().to_list(None)
if news_documents:
return [helper_func(news) for doc in news_documents for news in doc.get("documents", [])]
else:
raise HTTPException(status_code=404, detail="News items not found")