Skip to content

Commit 77b07bc

Browse files
committed
Merge branch 'master' into replace-scaled-clique-identifier-count
2 parents 90e65d2 + 6f42776 commit 77b07bc

File tree

3 files changed

+3770
-186
lines changed

3 files changed

+3770
-186
lines changed

api/server.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
* Matching names are returned first, followed by non-matching names
99
"""
1010
import json
11-
import logging, warnings
11+
import logging
12+
import time
13+
import warnings
1214
import os
1315
import re
1416
from typing import Dict, List, Union, Annotated, Optional
@@ -19,13 +21,14 @@
1921
from pydantic import BaseModel, conint, Field
2022
from starlette.middleware.cors import CORSMiddleware
2123

22-
from .apidocs import get_app_info, construct_open_api_schema
24+
from api.apidocs import get_app_info, construct_open_api_schema
2325

24-
LOGGER = logging.getLogger(__name__)
2526
SOLR_HOST = os.getenv("SOLR_HOST", "localhost")
2627
SOLR_PORT = os.getenv("SOLR_PORT", "8983")
2728

2829
app = FastAPI(**get_app_info())
30+
logger = logging.getLogger(__name__)
31+
logging.basicConfig(level=os.getenv("LOGLEVEL", logging.INFO))
2932

3033
app.add_middleware(
3134
CORSMiddleware,
@@ -63,7 +66,7 @@ async def status() -> Dict:
6366
'action': 'STATUS'
6467
})
6568
if response.status_code >= 300:
66-
LOGGER.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
69+
logger.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
6770
response.raise_for_status()
6871
result = response.json()
6972

@@ -125,7 +128,7 @@ async def reverse_lookup_get(
125128
)
126129
) -> Dict[str, Dict]:
127130
"""Returns a list of synonyms for a particular CURIE."""
128-
return await reverse_lookup(curies)
131+
return await curie_lookup(curies)
129132

130133

131134
@app.get(
@@ -135,14 +138,14 @@ async def reverse_lookup_get(
135138
response_model=Dict[str, Dict],
136139
tags=["lookup"],
137140
)
138-
async def lookup_names_get(
141+
async def synonyms_get(
139142
preferred_curies: List[str]= Query(
140143
example=["MONDO:0005737", "MONDO:0009757"],
141144
description="A list of CURIEs to look up synonyms for."
142145
)
143146
) -> Dict[str, Dict]:
144147
"""Returns a list of synonyms for a particular CURIE."""
145-
return await reverse_lookup(preferred_curies)
148+
return await curie_lookup(preferred_curies)
146149

147150

148151
@app.post(
@@ -159,7 +162,7 @@ async def lookup_names_post(
159162
}),
160163
) -> Dict[str, Dict]:
161164
"""Returns a list of synonyms for a particular CURIE."""
162-
return await reverse_lookup(request.curies)
165+
return await curie_lookup(request.curies)
163166

164167

165168
@app.post(
@@ -169,17 +172,18 @@ async def lookup_names_post(
169172
response_model=Dict[str, Dict],
170173
tags=["lookup"],
171174
)
172-
async def lookup_names_post(
175+
async def synonyms_post(
173176
request: SynonymsRequest = Body(..., example={
174177
"preferred_curies": ["MONDO:0005737", "MONDO:0009757"],
175178
}),
176179
) -> Dict[str, Dict]:
177180
"""Returns a list of synonyms for a particular CURIE."""
178-
return await reverse_lookup(request.preferred_curies)
181+
return await curie_lookup(request.preferred_curies)
179182

180183

181-
async def reverse_lookup(curies) -> Dict[str, Dict]:
184+
async def curie_lookup(curies) -> Dict[str, Dict]:
182185
"""Returns a list of synonyms for a particular CURIE."""
186+
time_start = time.time_ns()
183187
query = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
184188
curie_filter = " OR ".join(
185189
f"curie:\"{curie}\""
@@ -199,6 +203,10 @@ async def reverse_lookup(curies) -> Dict[str, Dict]:
199203
}
200204
for doc in response_json["response"]["docs"]:
201205
output[doc["curie"]] = doc
206+
time_end = time.time_ns()
207+
208+
logger.info(f"CURIE Lookup on {len(curies)} CURIEs {json.dumps(curies)} took {(time_end - time_start)/1_000_000:.2f}ms")
209+
202210
return output
203211

204212
class LookupResult(BaseModel):
@@ -351,6 +359,8 @@ async def lookup(string: str,
351359
will be returned, rather than filtering to concepts that are both PhenotypicFeature and Disease.
352360
"""
353361

362+
time_start = time.time_ns()
363+
354364
# First, we strip and lowercase the query since all our indexes are case-insensitive.
355365
string_lc = string.strip().lower()
356366

@@ -483,16 +493,18 @@ async def lookup(string: str,
483493
"fields": "*, score",
484494
"params": inner_params,
485495
}
486-
logging.debug(f"Query: {json.dumps(params, indent=2)}")
496+
logger.debug(f"Query: {json.dumps(params, indent=2)}")
487497

498+
time_solr_start = time.time_ns()
488499
query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
489500
async with httpx.AsyncClient(timeout=None) as client:
490501
response = await client.post(query_url, json=params)
491502
if response.status_code >= 300:
492-
LOGGER.error("Solr REST error: %s", response.text)
503+
logger.error("Solr REST error: %s", response.text)
493504
response.raise_for_status()
494505
response = response.json()
495-
logging.debug(f"Solr response: {json.dumps(response, indent=2)}")
506+
time_solr_end = time.time_ns()
507+
logger.debug(f"Solr response: {json.dumps(response, indent=2)}")
496508

497509
# Associate highlighting information with search results.
498510
highlighting_response = response.get("highlighting", {})
@@ -535,6 +547,12 @@ async def lookup(string: str,
535547
clique_identifier_count=doc.get("clique_identifier_count", 0),
536548
types=[f"biolink:{d}" for d in doc.get("types", [])]))
537549

550+
time_end = time.time_ns()
551+
logger.info(f"Lookup query to Solr for {json.dumps(string)} " +
552+
f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, only_taxa={only_taxa}) "
553+
f"took {(time_end - time_start)/1_000_000:.2f}ms (with {(time_solr_end - time_solr_start)/1_000_000:.2f}ms waiting for Solr)"
554+
)
555+
538556
return outputs
539557

540558
## BULK ENDPOINT
@@ -603,6 +621,7 @@ class NameResQuery(BaseModel):
603621
tags=["lookup"]
604622
)
605623
async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
624+
time_start = time.time_ns()
606625
result = {}
607626
for string in query.strings:
608627
result[string] = await lookup(
@@ -615,6 +634,9 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
615634
query.only_prefixes,
616635
query.exclude_prefixes,
617636
query.only_taxa)
637+
time_end = time.time_ns()
638+
logger.info(f"Bulk lookup query for {len(query.strings)} strings ({query}) took {(time_end - time_start)/1_000_000:.2f}ms")
639+
618640
return result
619641

620642

data-loading/setup-and-load-solr.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ curl -X POST -H 'Content-type:application/json' --data-binary '{
7373
"name":"names_exactish",
7474
"type":"exactish",
7575
"indexed":true,
76-
"stored":true,
76+
"stored":false,
7777
"multiValued":true
7878
},
7979
{

0 commit comments

Comments
 (0)