diff --git a/warehouse/dbt_project.yml b/warehouse/dbt_project.yml index b4ca39a..8ca4fe4 100644 --- a/warehouse/dbt_project.yml +++ b/warehouse/dbt_project.yml @@ -31,5 +31,19 @@ models: staging: +schema: staging +tests: + data: + tests_high_prior: + severity: error + error_if: "!=0" + tests_medium_prior: + severity: error + error_if: ">10000" + warn_if: ">100" + tests_low_prior: + severity: warn + error_if: ">100" + warn_if: "!=0" + vars: sandbox_ids: ['ac06b512-0726-4c7c-b143-2a4a0a3c4467', 'dd63b8b4-8872-4fed-a33a-12a22a3fa6f2', '4866ebb1-39f2-4656-a556-a822ff85ea5c', '542a7c6b-b18d-4152-abae-f43ea445366f', '2db3f19e-3322-43d3-95f8-df12c7e8b9a7', 'a7f60032-72fc-41a8-bb41-ada0944fb512', 'fb0872cd-85ff-47dd-bdb5-2c8dddfdb916', 'bd4d7747-d70b-4f63-aa88-fd60d8008381'] diff --git a/warehouse/tests/tests_low_prior/llm_io_operations_duration.sql b/warehouse/tests/tests_low_prior/llm_io_operations_duration.sql new file mode 100644 index 0000000..e5e0a51 --- /dev/null +++ b/warehouse/tests/tests_low_prior/llm_io_operations_duration.sql @@ -0,0 +1,33 @@ +-- IO operations durations for analytical purposes. At first, does not require any alarms +with openai_stats as ( + select *, + extract(epoch from (requested_at - created_at)) as db_write_duration, + extract(epoch from (responded_at - requested_at)) as openai_duration + from {{ ref('stg_llm__open_ai_completion_status') }} +) +select + action, + entity, + model, + -- Database write duration percentiles + (percentile_cont(0.50) within group (order by db_write_duration))::decimal(8, 2) as db_write_p50, + (percentile_cont(0.95) within group (order by db_write_duration))::decimal(8, 2) as db_write_p95, + (percentile_cont(0.99) within group (order by db_write_duration))::decimal(8, 2) as db_write_p99, + -- OpenAI request duration percentiles + (percentile_cont(0.50) within group (order by openai_duration))::decimal(8, 2) as openai_p50, + (percentile_cont(0.95) within group (order by openai_duration))::decimal(8, 2) as openai_p95, + (percentile_cont(0.99) within group (order by openai_duration))::decimal(8, 2) as openai_p99, + -- OpenAI consumed tokens percentiles + (percentile_cont(0.50) within group (order by consumed_tokens))::int as consumed_tokens_p50, + (percentile_cont(0.95) within group (order by consumed_tokens))::int as consumed_tokens_p95, + (percentile_cont(0.99) within group (order by consumed_tokens))::int as consumed_tokens_p99, + -- OpenAI produced tokens percentiles + (percentile_cont(0.50) within group (order by produced_tokens))::int as produced_tokens_p50, + (percentile_cont(0.95) within group (order by produced_tokens))::int as produced_tokens_p95, + (percentile_cont(0.99) within group (order by produced_tokens))::int as produced_tokens_p99 +from + openai_stats +group by + action, + entity, + model diff --git a/warehouse/tests/tests_low_prior/llm_open_ai_request_duration.sql b/warehouse/tests/tests_low_prior/llm_open_ai_request_duration.sql new file mode 100644 index 0000000..649d1f0 --- /dev/null +++ b/warehouse/tests/tests_low_prior/llm_open_ai_request_duration.sql @@ -0,0 +1,22 @@ +-- OpenAI request duration histogram for analytical purposes. At first, does not require any alarms +select + width_bucket( + extract ( + epoch + from + responded_at - requested_at + ), + 0, + 60, + 60 + ) as duration_in_seconds, + count(*) +from + {{ ref('stg_llm__open_ai_completion_status') }} +where + requested_at is not null + and responded_at is not null +group by + duration_in_seconds +order by + duration_in_seconds diff --git a/warehouse/tests/tests_low_prior/llm_open_ai_token_usage.sql b/warehouse/tests/tests_low_prior/llm_open_ai_token_usage.sql new file mode 100644 index 0000000..f87b46f --- /dev/null +++ b/warehouse/tests/tests_low_prior/llm_open_ai_token_usage.sql @@ -0,0 +1,13 @@ +-- OpenAI total tokens usage histogram for analytical purposes. At first, does not require any alarms +select + width_bucket(total_tokens, 0, 4000, 10) * 400 as total_tokens_bucket, + count(*) +from + {{ ref('stg_llm__open_ai_completion_status') }} +where + total_tokens is not null + and "model" = 'gpt-3.5-turbo' +group by + total_tokens_bucket +order by + total_tokens_bucket diff --git a/warehouse/tests/tests_medium_prior/llm_window_size_threshold.sql b/warehouse/tests/tests_medium_prior/llm_window_size_threshold.sql new file mode 100644 index 0000000..346d04c --- /dev/null +++ b/warehouse/tests/tests_medium_prior/llm_window_size_threshold.sql @@ -0,0 +1,46 @@ +-- Total tokens that exceed each model's context window token size limit +with context_window as ( + select + * + from + ( + values + ('gpt-3.5-turbo', 4000), + ('gpt-3.5-turbo-16k', 16000), + ('gpt-4', 8000), + ('gpt-4-32k', 32000) + ) as context_window("model", "window_size") +), +threshold as ( + select + * + from + ( + values + ('normal', 0, 0.50), + ('look_out_50%', 0.50, 0.75), + ('warning_75%', 0.75, 0.90), + ('severe_90%', 0.90, 0.95), + ('critical_95%', 0.95, 1), + ('exceeded_100%', 1, float8 '+infinity') + ) as threshold("label", "min", "max") +) +select + oac.model as model, + t.label as threshold, + count(*) as "count" +from + {{ ref('stg_llm__open_ai_completion_status') }} oac + inner join context_window cw on cw.model = oac.model + inner join threshold t on oac.total_tokens :: float / cw.window_size :: float between t.min + and t.max - 0.00001 +where + oac.total_tokens is not null + and oac.model = cw.model + and t.label <> 'normal' +group by + oac.model, + t.label, + t.min +order by + t.min