Skip to content

Commit 1389081

Browse files
Update test suite
1 parent 7e94591 commit 1389081

File tree

8 files changed

+218
-3
lines changed

8 files changed

+218
-3
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
content-type:
6+
- application/json
7+
user-agent:
8+
- opensearch-py/2.8.0 (Python 3.12.11)
9+
method: GET
10+
uri: http://localhost:9200/_cat/aliases?format=json
11+
response:
12+
body:
13+
string: '[{"alias":"all-current","index":"libguides-2025-12-11t16-36-09","filter":"-","routing.index":"-","routing.search":"-","is_write_index":"-"},{"alias":"libguides","index":"libguides-2025-12-11t16-36-09","filter":"-","routing.index":"-","routing.search":"-","is_write_index":"-"},{"alias":"all-current","index":"test-index-2025-12-11t16-58-08","filter":"-","routing.index":"-","routing.search":"-","is_write_index":"-"},{"alias":"test-index","index":"test-index-2025-12-11t16-58-08","filter":"-","routing.index":"-","routing.search":"-","is_write_index":"-"},{"alias":".kibana","index":".kibana_1","filter":"-","routing.index":"-","routing.search":"-","is_write_index":"-"}]'
14+
headers:
15+
content-length:
16+
- '671'
17+
content-type:
18+
- application/json; charset=UTF-8
19+
status:
20+
code: 200
21+
message: OK
22+
version: 1
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
interactions:
2+
- request:
3+
body: '{"update":{"_id":"i-am-not-found","_index":"test-index"}}
4+
5+
{"doc":{"timdex_record_id":"i-am-not-found","title":"Materials Science & Engineering
6+
(UPDATED)"}}
7+
8+
'
9+
headers:
10+
Content-Length:
11+
- '156'
12+
content-type:
13+
- application/json
14+
user-agent:
15+
- opensearch-py/2.8.0 (Python 3.12.11)
16+
method: POST
17+
uri: http://localhost:9200/_bulk
18+
response:
19+
body:
20+
string: '{"took":9,"errors":true,"items":[{"update":{"_index":"test-index-2025-12-11t16-58-08","_id":"i-am-not-found","status":404,"error":{"type":"document_missing_exception","reason":"[i-am-not-found]:
21+
document missing","index":"test-index-2025-12-11t16-58-08","shard":"0","index_uuid":"in04_JvQS5qqCvUXeZta_g"}}}]}'
22+
headers:
23+
content-length:
24+
- '308'
25+
content-type:
26+
- application/json; charset=UTF-8
27+
status:
28+
code: 200
29+
message: OK
30+
version: 1
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
interactions:
2+
- request:
3+
body: '{"update":{"_id":"libguides:guides-175846","_index":"test-index"}}
4+
5+
{"doc":{"timdex_record_id":"libguides:guides-175846","title":"Materials Science
6+
& Engineering (UPDATED)"}}
7+
8+
'
9+
headers:
10+
Content-Length:
11+
- '174'
12+
content-type:
13+
- application/json
14+
user-agent:
15+
- opensearch-py/2.8.0 (Python 3.12.11)
16+
method: POST
17+
uri: http://localhost:9200/_bulk
18+
response:
19+
body:
20+
string: '{"took":7,"errors":false,"items":[{"update":{"_index":"test-index-2025-12-11t16-58-08","_id":"libguides:guides-175846","_version":4,"result":"updated","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":7,"_primary_term":1,"status":200}}]}'
21+
headers:
22+
content-length:
23+
- '245'
24+
content-type:
25+
- application/json; charset=UTF-8
26+
status:
27+
code: 200
28+
message: OK
29+
version: 1

tests/test_cli.py

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from freezegun import freeze_time
66

77
from tim.cli import main
8-
from tim.errors import BulkIndexingError
8+
from tim.errors import BulkIndexingError, BulkOperationError
99

1010
from .conftest import EXIT_CODES, my_vcr
1111

@@ -274,6 +274,93 @@ def test_bulk_update_with_source_raise_bulk_indexing_error(
274274
)
275275

276276

277+
@patch("tim.helpers.validate_bulk_cli_options")
278+
@patch("tim.opensearch.bulk_update")
279+
def test_bulk_update_embeddings_success(
280+
mock_bulk_update, mock_validate_bulk_cli_options, caplog, monkeypatch, runner
281+
):
282+
monkeypatch.delenv("TIMDEX_OPENSEARCH_ENDPOINT", raising=False)
283+
mock_bulk_update.return_value = {"updated": 1, "errors": 0, "total": 1}
284+
mock_validate_bulk_cli_options.return_value = "libguides"
285+
286+
result = runner.invoke(
287+
main,
288+
[
289+
"bulk-update-embeddings",
290+
"--source",
291+
"libguides",
292+
"--run-date",
293+
"2025-05-07",
294+
"--run-id",
295+
"85cfe316-089c-4639-a5af-c861a7321493",
296+
"tests/fixtures/dataset",
297+
],
298+
)
299+
300+
assert result.exit_code == EXIT_CODES["success"]
301+
assert (
302+
f"Bulk update with embeddings complete: {json.dumps(mock_bulk_update())}"
303+
in caplog.text
304+
)
305+
306+
307+
@patch("tim.helpers.validate_bulk_cli_options")
308+
@patch("tim.opensearch.bulk_update")
309+
def test_bulk_update_embeddings_raise_error(
310+
mock_bulk_update, mock_validate_bulk_cli_options, caplog, monkeypatch, runner
311+
):
312+
monkeypatch.delenv("TIMDEX_OPENSEARCH_ENDPOINT", raising=False)
313+
mock_bulk_update.return_value = {"updated": 0, "errors": 1, "total": 1}
314+
mock_validate_bulk_cli_options.return_value = "libguides"
315+
316+
result = runner.invoke(
317+
main,
318+
[
319+
"bulk-update-embeddings",
320+
"--source",
321+
"libguides",
322+
"--run-date",
323+
"2025-05-07",
324+
"--run-id",
325+
"85cfe316-089c-4639-a5af-c861a7321493",
326+
"tests/fixtures/dataset",
327+
],
328+
)
329+
assert result.exit_code == EXIT_CODES["success"]
330+
assert (
331+
f"Bulk update with embeddings complete: {json.dumps(mock_bulk_update())}"
332+
in caplog.text
333+
)
334+
335+
336+
@patch("tim.helpers.validate_bulk_cli_options")
337+
@patch("tim.opensearch.bulk_update")
338+
def test_bulk_update_embeddings_raise_bulk_operation_error(
339+
mock_bulk_update, mock_validate_bulk_cli_options, caplog, monkeypatch, runner
340+
):
341+
monkeypatch.delenv("TIMDEX_OPENSEARCH_ENDPOINT", raising=False)
342+
mock_bulk_update.side_effect = BulkOperationError(
343+
action="update", record="alma:0", index="index", error="exception"
344+
)
345+
mock_validate_bulk_cli_options.return_value = "libguides"
346+
347+
result = runner.invoke(
348+
main,
349+
[
350+
"bulk-update-embeddings",
351+
"--source",
352+
"libguides",
353+
"--run-date",
354+
"2025-05-07",
355+
"--run-id",
356+
"85cfe316-089c-4639-a5af-c861a7321493",
357+
"tests/fixtures/dataset",
358+
],
359+
)
360+
assert result.exit_code == EXIT_CODES["success"]
361+
assert "Bulk update with embeddings failed" in caplog.text
362+
363+
277364
@patch("tim.opensearch.create_index")
278365
@patch("tim.opensearch.promote_index")
279366
@patch("tim.opensearch.get_index_aliases")

tests/test_helpers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ def test_generate_bulk_actions_delete():
5151
}
5252

5353

54+
def test_generate_bulk_actions_update():
55+
records = [{"timdex_record_id": "12345"}]
56+
actions = helpers.generate_bulk_actions("test-index", records, "update")
57+
assert next(actions) == {
58+
"_op_type": "update",
59+
"_index": "test-index",
60+
"_id": "12345",
61+
"doc": {"timdex_record_id": "12345"},
62+
}
63+
64+
5465
def test_generate_bulk_actions_invalid_action_raises_error():
5566
records = [{"timdex_record_id": "12345", "other_fields": "some_data"}]
5667
actions = helpers.generate_bulk_actions("test-index", records, "wrong")

tests/test_opensearch.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from tim.config import PRIMARY_ALIAS
1010
from tim.errors import (
1111
AliasNotFoundError,
12+
BulkOperationError,
1213
IndexExistsError,
1314
IndexNotFoundError,
1415
)
@@ -532,3 +533,38 @@ def test_bulk_delete_logs_error_if_record_not_found(
532533
"Record to delete 'i-am-not-found' was not found in index 'test-index'."
533534
in caplog.text
534535
)
536+
537+
538+
# what happens when you try to 'bulk_update_embeddings' a document that doesn't exist*
539+
# - do you need all these mocks or VCRs to test that (probably no :o)
540+
# fixture that reindexes a source into the mock
541+
# if we run tda in fixtures/datasets -- what's in it??
542+
@my_vcr.use_cassette("opensearch/bulk_update_updates_records.yaml")
543+
def test_bulk_update_updates_records(test_opensearch_client):
544+
updates = [
545+
{
546+
"timdex_record_id": "libguides:guides-175846",
547+
"title": "Materials Science & Engineering (UPDATED)",
548+
}
549+
]
550+
assert tim_os.bulk_update(test_opensearch_client, "test-index", iter(updates)) == {
551+
"updated": 1,
552+
"errors": 0,
553+
"total": 1,
554+
}
555+
556+
557+
@my_vcr.use_cassette(
558+
"opensearch/bulk_update_raises_bulk_operation_error_if_record_not_found.yaml"
559+
)
560+
def test_bulk_update_raises_bulk_operation_error_if_record_not_found(
561+
test_opensearch_client,
562+
):
563+
updates = [
564+
{
565+
"timdex_record_id": "i-am-not-found",
566+
"title": "Materials Science & Engineering (UPDATED)",
567+
}
568+
]
569+
with pytest.raises(BulkOperationError):
570+
tim_os.bulk_update(test_opensearch_client, "test-index", iter(updates))

tim/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tim import errors, helpers
1111
from tim import opensearch as tim_os
1212
from tim.config import PRIMARY_ALIAS, VALID_SOURCES, configure_logger, configure_sentry
13-
from tim.errors import BulkIndexingError
13+
from tim.errors import BulkIndexingError, BulkOperationError
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -390,7 +390,7 @@ def bulk_update_embeddings(
390390

391391
try:
392392
update_results.update(tim_os.bulk_update(client, index, embeddings_to_index))
393-
except BulkIndexingError as exception:
393+
except BulkOperationError as exception:
394394
logger.info(f"Bulk update with embeddings failed: {exception}")
395395

396396
logger.info(f"Bulk update with embeddings complete: {json.dumps(update_results)}")

0 commit comments

Comments
 (0)