diff --git a/.gitignore b/.gitignore index 6f50c5abf8..106e6dddb1 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ auth/keycloak/config/*.log /services/net/image/keys /services/net/image/data +**/[Bb]in/ +**/[Oo]bj/ + # TLS certificates - managed separately, not in git openshift/kustomize/.tls-certs/*.crt openshift/kustomize/.tls-certs/*.key diff --git a/.vscode/launch.json b/.vscode/launch.json index 55e1c5dcdb..a92157fa1f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,8 +1,6 @@ { "version": "0.2.0", "configurations": [ - - { // Use IntelliSense to find out which attributes exist for C# debugging // Use hover for the description of the existing attributes @@ -105,6 +103,23 @@ "stopAtEntry": false, "envFile": "${workspaceFolder}/services/net/transcription/.env" }, + { + // Use IntelliSense to find out which attributes exist for C# debugging + // Use hover for the description of the existing attributes + // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md + "name": "Run Auto Clipper Service", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "build-auto-clipper", + // If you have changed target frameworks, make sure to update the program path. + "program": "${workspaceFolder}/services/net/auto-clipper/bin/Debug/net9.0/TNO.Services.AutoClipper.dll", + "args": [], + "cwd": "${workspaceFolder}/services/net/auto-clipper", + // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console + "console": "internalConsole", + "stopAtEntry": false, + "envFile": "${workspaceFolder}/services/net/auto-clipper/.env" + }, { // Use IntelliSense to find out which attributes exist for C# debugging // Use hover for the description of the existing attributes diff --git a/.vscode/settings.json b/.vscode/settings.json index d04a5fd689..2300553d3d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,6 +12,7 @@ "CHES", "datalabels", "formik", + "healthcheck", "Idir", "insertable", "Keycloak", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 96f51d018c..42ec776286 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -29,12 +29,7 @@ "label": "watch", "command": "dotnet", "type": "process", - "args": [ - "watch", - "run", - "--project", - "${workspaceFolder}/api/net/TNO.API.csproj" - ], + "args": ["watch", "run", "--project", "${workspaceFolder}/api/net/TNO.API.csproj"], "problemMatcher": "$msCompile" }, { @@ -213,7 +208,43 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/transcription/TNO.Services.Transcription.csproj" + "${workspaceFolder}/services/net/atranscription/TNO.Services.Transcription.csproj" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "build-auto-clipper", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "${workspaceFolder}/services/net/auto-clipper/TNO.Services.AutoClipper.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "publish-auto-clipper", + "command": "dotnet", + "type": "process", + "args": [ + "publish", + "${workspaceFolder}/services/net/auto-clipper/TNO.Services.AutoClipper.csproj", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "watch-auto-clipper", + "command": "dotnet", + "type": "process", + "args": [ + "watch", + "run", + "--project", + "${workspaceFolder}/services/net/auto-clipper/TNO.Services.TranAutoClipperscription.csproj" ], "problemMatcher": "$msCompile" }, @@ -357,7 +388,7 @@ "watch", "run", "--project", - "${workspaceFolder}/tools/elastic/migration/TNO.Elastic.Migration.csproj", + "${workspaceFolder}/tools/elastic/migration/TNO.Elastic.Migration.csproj" ], "problemMatcher": "$msCompile" }, @@ -393,7 +424,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/notification/TNO.Services.Notification.csproj", + "${workspaceFolder}/services/net/notification/TNO.Services.Notification.csproj" ], "problemMatcher": "$msCompile" }, @@ -429,7 +460,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/reporting/TNO.Services.Reporting.csproj", + "${workspaceFolder}/services/net/reporting/TNO.Services.Reporting.csproj" ], "problemMatcher": "$msCompile" }, @@ -465,7 +496,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/scheduler/TNO.Services.Scheduler.csproj", + "${workspaceFolder}/services/net/scheduler/TNO.Services.Scheduler.csproj" ], "problemMatcher": "$msCompile" }, @@ -501,7 +532,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/folder-collection/TNO.Services.FolderCollection.csproj", + "${workspaceFolder}/services/net/folder-collection/TNO.Services.FolderCollection.csproj" ], "problemMatcher": "$msCompile" }, @@ -537,7 +568,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/ffmpeg/TNO.Services.FFmpeg.csproj", + "${workspaceFolder}/services/net/ffmpeg/TNO.Services.FFmpeg.csproj" ], "problemMatcher": "$msCompile" }, @@ -573,7 +604,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/extract-quotes/TNO.Services.ExtractQuotes.csproj", + "${workspaceFolder}/services/net/extract-quotes/TNO.Services.ExtractQuotes.csproj" ], "problemMatcher": "$msCompile" }, @@ -609,7 +640,7 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/event-handler/TNO.Services.EventHandler.csproj", + "${workspaceFolder}/services/net/event-handler/TNO.Services.EventHandler.csproj" ], "problemMatcher": "$msCompile" }, @@ -645,11 +676,10 @@ "watch", "run", "--project", - "${workspaceFolder}/services/net/ches-retry/TNO.Services.ChesRetry.csproj", + "${workspaceFolder}/services/net/ches-retry/TNO.Services.ChesRetry.csproj" ], "problemMatcher": "$msCompile" - } - , + }, { "label": "build-elastic-indexer", "command": "dotnet", @@ -682,7 +712,7 @@ "watch", "run", "--project", - "${workspaceFolder}/tools/indexer/TNO.Tools.ElasticIndexer.csproj", + "${workspaceFolder}/tools/indexer/TNO.Tools.ElasticIndexer.csproj" ], "problemMatcher": "$msCompile" } diff --git a/TNO.sln b/TNO.sln index 0a696cf823..7fee31e681 100644 --- a/TNO.sln +++ b/TNO.sln @@ -71,8 +71,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.TemplateEngine", "libs\ EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.Scheduler", "services\net\scheduler\TNO.Services.Scheduler.csproj", "{A2DD9547-A4AA-4E07-9239-77D689F49C47}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.ContentMigration", "services\net\contentmigration\TNO.Services.ContentMigration.csproj", "{7D0917C1-DFE3-420E-9980-D81947AC405F}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.FolderCollection", "services\net\folder-collection\TNO.Services.FolderCollection.csproj", "{B559B641-F1F0-41D6-9938-A23EC06542A2}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.ExtractQuotes", "services\net\extract-quotes\TNO.Services.ExtractQuotes.csproj", "{9BC92B16-7AF9-4B45-BA18-C6A98E2AD87E}" @@ -85,6 +83,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.EventHandler", EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.ChesRetry", "services\net\ches-retry\TNO.Services.ChesRetry.csproj", "{067EA7C3-A816-406B-B36A-09FC05A427A1}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TNO.Services.AutoClipper", "services\net\auto-clipper\TNO.Services.AutoClipper.csproj", "{7B8BF924-36BA-422E-85FD-1C590B092F7B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -198,10 +198,6 @@ Global {A2DD9547-A4AA-4E07-9239-77D689F49C47}.Debug|Any CPU.Build.0 = Debug|Any CPU {A2DD9547-A4AA-4E07-9239-77D689F49C47}.Release|Any CPU.ActiveCfg = Release|Any CPU {A2DD9547-A4AA-4E07-9239-77D689F49C47}.Release|Any CPU.Build.0 = Release|Any CPU - {7D0917C1-DFE3-420E-9980-D81947AC405F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7D0917C1-DFE3-420E-9980-D81947AC405F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7D0917C1-DFE3-420E-9980-D81947AC405F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7D0917C1-DFE3-420E-9980-D81947AC405F}.Release|Any CPU.Build.0 = Release|Any CPU {B559B641-F1F0-41D6-9938-A23EC06542A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B559B641-F1F0-41D6-9938-A23EC06542A2}.Debug|Any CPU.Build.0 = Debug|Any CPU {B559B641-F1F0-41D6-9938-A23EC06542A2}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -226,6 +222,10 @@ Global {067EA7C3-A816-406B-B36A-09FC05A427A1}.Debug|Any CPU.Build.0 = Debug|Any CPU {067EA7C3-A816-406B-B36A-09FC05A427A1}.Release|Any CPU.ActiveCfg = Release|Any CPU {067EA7C3-A816-406B-B36A-09FC05A427A1}.Release|Any CPU.Build.0 = Release|Any CPU + {7B8BF924-36BA-422E-85FD-1C590B092F7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7B8BF924-36BA-422E-85FD-1C590B092F7B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7B8BF924-36BA-422E-85FD-1C590B092F7B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7B8BF924-36BA-422E-85FD-1C590B092F7B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {16EA028B-B4C8-416D-BE54-D73D75483668} = {F627B24A-217D-4BF1-BC77-E1A92DBCD07F} @@ -258,12 +258,12 @@ Global {65185F9A-73C0-4C59-8DC4-892616963E43} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {CE993EF7-A38F-4563-837B-1194375D459B} = {890D13F9-A1ED-4B00-8E69-A1AB620F31A9} {A2DD9547-A4AA-4E07-9239-77D689F49C47} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} - {7D0917C1-DFE3-420E-9980-D81947AC405F} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {B559B641-F1F0-41D6-9938-A23EC06542A2} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {9BC92B16-7AF9-4B45-BA18-C6A98E2AD87E} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {E7444ADF-0137-439B-8E20-917CF2FAFA45} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {2D455400-0E86-476E-8C42-532D32C10107} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {6F1F9B85-B155-4A5A-BB36-10F734F96A12} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} {067EA7C3-A816-406B-B36A-09FC05A427A1} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} + {7B8BF924-36BA-422E-85FD-1C590B092F7B} = {448D6DE6-6887-48EC-A202-18C7EB428ACD} EndGlobalSection EndGlobal diff --git a/api/net/Areas/Editor/Controllers/WorkOrderController.cs b/api/net/Areas/Editor/Controllers/WorkOrderController.cs index 4cb1705989..d54f954c21 100644 --- a/api/net/Areas/Editor/Controllers/WorkOrderController.cs +++ b/api/net/Areas/Editor/Controllers/WorkOrderController.cs @@ -147,6 +147,29 @@ public async Task RequestTranscriptionAsync(long contentId) return new JsonResult(new WorkOrderMessageModel(workOrder, _serializerOptions)); } + /// + /// Request an auto clip for the content for the specified 'contentId'. + /// Publish message to kafka to request an auto clip. + /// + /// + /// + [HttpPost("auto-clip/{contentId}")] + [Produces(MediaTypeNames.Application.Json)] + [ProducesResponseType(typeof(WorkOrderMessageModel), (int)HttpStatusCode.OK)] + [ProducesResponseType(typeof(ErrorResponseModel), (int)HttpStatusCode.BadRequest)] + [SwaggerOperation(Tags = new[] { "WorkOrder" })] + public async Task RequestAutoClipAsync(long contentId) + { + var workOrder = await _workOrderHelper.RequestAutoClipAsync(contentId, true); + if (workOrder.Status != WorkOrderStatus.Submitted) + return new JsonResult(new WorkOrderMessageModel(workOrder, _serializerOptions)) + { + StatusCode = (int)HttpStatusCode.AlreadyReported + }; + + return new JsonResult(new WorkOrderMessageModel(workOrder, _serializerOptions)); + } + /// /// Request a Natural Language Processing for the content for the specified 'contentId'. /// Publish message to kafka to request a NLP. diff --git a/api/net/Areas/Helpers/IWorkOrderHelper.cs b/api/net/Areas/Helpers/IWorkOrderHelper.cs index 62251de1d3..f03ee532fa 100644 --- a/api/net/Areas/Helpers/IWorkOrderHelper.cs +++ b/api/net/Areas/Helpers/IWorkOrderHelper.cs @@ -34,6 +34,18 @@ public interface IWorkOrderHelper /// Task RequestTranscriptionAsync(long contentId, bool force = false); + /// + /// Request a auto clip for the specified 'contentId'. + /// Only allow one active auto clip request. + /// + /// + /// Whether to force a request regardless of the prior requests state + /// + /// + /// + /// + Task RequestAutoClipAsync(long contentId, bool force = false); + /// /// Request a transcript for the specified 'contentId'. /// Only allow one active transcript request. @@ -47,6 +59,19 @@ public interface IWorkOrderHelper /// Task RequestTranscriptionAsync(long contentId, Entities.User requestor, bool force = false); + /// + /// Request a auto clip for the specified 'contentId'. + /// Only allow one active auto clip request. + /// + /// + /// + /// Whether to force a request regardless of the prior requests state + /// + /// + /// + /// + Task RequestAutoClipAsync(long contentId, Entities.User requestor, bool force = false); + /// /// Request a natural language processing for the specified 'contentId'. /// Only allow one active nlp request. diff --git a/api/net/Areas/Helpers/WorkOrderHelper.cs b/api/net/Areas/Helpers/WorkOrderHelper.cs index 5b721663da..92a56a54d4 100644 --- a/api/net/Areas/Helpers/WorkOrderHelper.cs +++ b/api/net/Areas/Helpers/WorkOrderHelper.cs @@ -136,6 +136,24 @@ public bool ShouldAutoTranscribe(long contentId) return await RequestTranscriptionAsync(contentId, user, force); } + /// + /// Request a auto clip for the specified 'contentId'. + /// Only allow one active auto clip request. + /// + /// + /// Whether to force a request regardless of the prior requests state + /// + /// + /// + /// + public async Task RequestAutoClipAsync(long contentId, bool force = false) + { + string username = _principal.GetUsername() ?? throw new NotAuthorizedException("Username is missing"); + var user = _userService.FindByUsername(username) ?? throw new NotAuthorizedException("User is missing"); + + return await RequestAutoClipAsync(contentId, user, force); + } + /// /// Determine if the content has an existing transcript. /// @@ -195,6 +213,51 @@ public bool HasExistingTranscript(long contentId) return workOrders.OrderByDescending(w => w.CreatedOn).First(); } + /// + /// Request a auto clip for the specified 'contentId'. + /// Only allow one active auto clip request. + /// + /// + /// + /// Whether to force a request regardless of the prior requests state + /// + /// + /// + /// + /// + public async Task RequestAutoClipAsync(long contentId, Entities.User requestor, bool force = false) + { + if (this.Content == null || this.Content.Id != contentId) + this.Content = _contentService.FindById(contentId) ?? throw new NoContentException("Content does not exist"); + if (String.IsNullOrWhiteSpace(_kafkaOptions.AutoClipTopic)) throw new ConfigurationException("Kafka auto clip topic not configured."); + + if (this.Content.IsApproved && force == false) throw new InvalidOperationException("Content is already approved"); + // Only allow one work order auto clip request at a time. + // TODO: Handle blocked work orders stuck in progress. + var workOrders = _workOrderService.FindByContentId(contentId); + + // Add the user to the content notification. + _notificationService.SubscriberUserToContent(requestor.Id, contentId); + + if (force || !workOrders.Any(o => o.WorkType == Entities.WorkOrderType.AutoClip || !WorkLimiterStatus.Contains(o.Status))) + { + var headlineString = $"{{ \"headline\": \"{this.Content.Headline.Replace("\n", "")}\" }}"; + var configuration = JsonDocument.Parse(headlineString); + var workOrder = _workOrderService.AddAndSave( + new Entities.WorkOrder( + Entities.WorkOrderType.AutoClip, + requestor, + "", + this.Content, + configuration + )); + + await _kafkaMessenger.SendMessageAsync(_kafkaOptions.AutoClipTopic, new TNO.Kafka.Models.ClipRequestModel(workOrder)); + return workOrder; + } + return workOrders.OrderByDescending(w => w.CreatedOn).First(); + } + /// /// Request a natural language processing for the specified 'contentId'. /// Only allow one active nlp request. diff --git a/api/net/Config/KafkaOptions.cs b/api/net/Config/KafkaOptions.cs index 877dde1320..fb7e014a79 100644 --- a/api/net/Config/KafkaOptions.cs +++ b/api/net/Config/KafkaOptions.cs @@ -16,6 +16,11 @@ public class KafkaOptions /// public string TranscriptionTopic { get; set; } = ""; + /// + /// get/set - The Kafka topic name to request auto clips. + /// + public string AutoClipTopic { get; set; } = ""; + /// /// get/set - The Kafka topic name to request NLP. /// diff --git a/api/net/TNO.API.csproj b/api/net/TNO.API.csproj index 541e065147..e3c4205761 100644 --- a/api/net/TNO.API.csproj +++ b/api/net/TNO.API.csproj @@ -16,12 +16,17 @@ - + + + + + + diff --git a/api/net/appsettings.json b/api/net/appsettings.json index fa2350c1b7..0078bd7fb5 100644 --- a/api/net/appsettings.json +++ b/api/net/appsettings.json @@ -62,6 +62,7 @@ "Kafka": { "IndexingTopic": "index", "TranscriptionTopic": "transcribe", + "AutoClipTopic": "request-clips", "NLPTopic": "nlp", "FileRequestTopic": "file-request", "NotificationTopic": "notify", diff --git a/app/editor/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip b/app/editor/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip similarity index 92% rename from app/editor/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip rename to app/editor/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip index 3204dd13bb..e44eb0e776 100644 Binary files a/app/editor/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip and b/app/editor/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip differ diff --git a/app/editor/package.json b/app/editor/package.json index c656da9b28..0b66224c91 100644 --- a/app/editor/package.json +++ b/app/editor/package.json @@ -60,7 +60,7 @@ "redux-logger": "3.0.6", "styled-components": "6.1.11", "stylis": "4.3.2", - "tno-core": "1.0.29" + "tno-core": "1.0.30" }, "devDependencies": { "@simbathesailor/use-what-changed": "2.0.0", diff --git a/app/editor/src/features/content/form/ContentForm.tsx b/app/editor/src/features/content/form/ContentForm.tsx index af23e685fc..d44f3bf448 100644 --- a/app/editor/src/features/content/form/ContentForm.tsx +++ b/app/editor/src/features/content/form/ContentForm.tsx @@ -86,6 +86,7 @@ const ContentForm: React.FC = ({ handlePublish, handleUnpublish, handleTranscribe, + handleAutoClip, handleNLP, goToNext, file, @@ -105,6 +106,7 @@ const ContentForm: React.FC = ({ const { id } = useParams(); const { isShowing: showDeleteModal, toggle: toggleDelete } = useModal(); const { isShowing: showTranscribeModal, toggle: toggleTranscribe } = useModal(); + const { isShowing: showAutoClipModal, toggle: toggleAutoClip } = useModal(); const { isShowing: showNLPModal, toggle: toggleNLP } = useModal(); const refForm = React.useRef(null); @@ -842,7 +844,10 @@ const ContentForm: React.FC = ({ Transcript @@ -1073,7 +1078,7 @@ const ContentForm: React.FC = ({ onClick={() => isWorkOrderStatus( form.workOrders, - WorkOrderTypeName.Transcription, + [WorkOrderTypeName.Transcription, WorkOrderTypeName.AutoClip], [WorkOrderStatusName.Completed], ) ? toggleTranscribe() @@ -1090,6 +1095,27 @@ const ContentForm: React.FC = ({ > Transcribe + = ({ } }} /> + { + try { + await handleAutoClip(props.values, props); + } finally { + toggleAutoClip(); + } + }} + /> ) => { + try { + // TODO: Only save when required. + // Save before submitting request. + const content = await handleSubmit(values, formikHelpers); + const response = await autoClip(toModel(values)); + setForm({ ...content, workOrders: [response.data, ...form.workOrders] }); + + if (response.status === 200) toast.success('An auto clip has been requested'); + else if (response.status === 208) { + if (response.data.status === WorkOrderStatusName.Completed) + toast.warn('Content has already been auto clipped'); + else toast.warn(`An active request for auto clipping already exists`); + } + } catch { + // Ignore this failure it is handled by our global ajax requests. + } + }, + [form.workOrders, handleSubmit, autoClip], + ); + const handleNLP = React.useCallback( async (values: IContentForm, formikHelpers: FormikHelpers) => { try { @@ -434,6 +456,7 @@ export const useContentForm = ({ handlePublish, handleUnpublish, handleTranscribe, + handleAutoClip, handleNLP, handleFFmpeg, goToNext, diff --git a/app/editor/src/features/content/utils/findWorkOrder.ts b/app/editor/src/features/content/utils/findWorkOrder.ts index d12e5cbccb..26fa5bfab1 100644 --- a/app/editor/src/features/content/utils/findWorkOrder.ts +++ b/app/editor/src/features/content/utils/findWorkOrder.ts @@ -8,7 +8,10 @@ import { IWorkOrderModel, WorkOrderTypeName } from 'tno-core'; */ export const findWorkOrder = ( workOrders: IWorkOrderModel[] | undefined, - type: WorkOrderTypeName, + type: WorkOrderTypeName | WorkOrderTypeName[], ) => { + if (Array.isArray(type)) { + return workOrders?.find((i) => type.includes(i.workType)); + } return workOrders?.find((i) => i.workType === type); }; diff --git a/app/editor/src/features/content/utils/isWorkOrderStatus.ts b/app/editor/src/features/content/utils/isWorkOrderStatus.ts index 0b568cda2a..68b25afd8f 100644 --- a/app/editor/src/features/content/utils/isWorkOrderStatus.ts +++ b/app/editor/src/features/content/utils/isWorkOrderStatus.ts @@ -9,8 +9,10 @@ import { IWorkOrderModel, WorkOrderStatusName, WorkOrderTypeName } from 'tno-cor */ export const isWorkOrderStatus = ( workOrders: IWorkOrderModel[] | undefined, - type: WorkOrderTypeName, + type: WorkOrderTypeName | WorkOrderTypeName[], status: WorkOrderStatusName[], ) => { + if (Array.isArray(type)) + return workOrders?.some((i) => type.includes(i.workType) && status.includes(i.status)) ?? false; return workOrders?.some((i) => i.workType === type && status.includes(i.status)) ?? false; }; diff --git a/app/editor/src/store/hooks/editor/useWorkOrders.ts b/app/editor/src/store/hooks/editor/useWorkOrders.ts index 63e5f8777a..f15dc4bb72 100644 --- a/app/editor/src/store/hooks/editor/useWorkOrders.ts +++ b/app/editor/src/store/hooks/editor/useWorkOrders.ts @@ -17,6 +17,7 @@ interface IWorkOrderController { findWorkOrders: (filter: IWorkOrderFilter) => Promise>>; updateWorkOrder: (workOrder: IWorkOrderModel) => Promise>; transcribe: (content: IContentModel) => Promise>; + autoClip: (content: IContentModel) => Promise>; nlp: (content: IContentModel) => Promise>; requestFile: (locationId: number, path: string) => Promise>; ffmpeg: (content: IContentModel) => Promise>; @@ -44,6 +45,9 @@ export const useWorkOrders = (): [IWorkOrderState, IWorkOrderController] => { transcribe: async (content: IContentModel) => { return await dispatch('transcribe-content', () => api.transcribe(content)); }, + autoClip: async (content: IContentModel) => { + return await dispatch('auto-clip-content', () => api.autoClip(content)); + }, nlp: async (content: IContentModel) => { return await dispatch('nlp-content', () => api.nlp(content)); }, diff --git a/app/editor/tno-core-1.0.28.tgz b/app/editor/tno-core-1.0.28.tgz deleted file mode 100644 index 21c9866e5b..0000000000 Binary files a/app/editor/tno-core-1.0.28.tgz and /dev/null differ diff --git a/app/editor/yarn.lock b/app/editor/yarn.lock index 401b8c6091..0e57b65dab 100644 --- a/app/editor/yarn.lock +++ b/app/editor/yarn.lock @@ -12209,7 +12209,7 @@ __metadata: sass-extract-loader: 1.1.0 styled-components: 6.1.11 stylis: 4.3.2 - tno-core: 1.0.29 + tno-core: 1.0.30 typescript: 4.9.5 vitest: 3.0.7 languageName: unknown @@ -16674,9 +16674,9 @@ __metadata: languageName: node linkType: hard -"tno-core@npm:1.0.29": - version: 1.0.29 - resolution: "tno-core@npm:1.0.29" +"tno-core@npm:1.0.30": + version: 1.0.30 + resolution: "tno-core@npm:1.0.30" dependencies: "@elastic/elasticsearch": ^8.13.1 "@fortawesome/free-solid-svg-icons": ^6.4.2 @@ -16709,7 +16709,7 @@ __metadata: styled-components: ^6.1.11 stylis: ^4.3.2 yup: ^1.1.1 - checksum: 277dacbb5080703241317cb524e6faa7f7dd6e9af7b8bfb7e247d72f185c344cf2a486d6349960a2cf239c06f41c1124242d3e1c962ca37cee9e57de70e2ec62 + checksum: 33f6b3545abd77bf5f34848cf0a25d240e1d8ddfd4e542fa18be357977e5d584f60668eb01c2d5e3f10052eaec83366ee9f07b2c245a9176bf7b47a1bc9952f3 languageName: node linkType: hard diff --git a/app/subscriber/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip b/app/subscriber/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip similarity index 92% rename from app/subscriber/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip rename to app/subscriber/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip index 3204dd13bb..e44eb0e776 100644 Binary files a/app/subscriber/.yarn/cache/tno-core-npm-1.0.29-b0ccc3fe82-277dacbb50.zip and b/app/subscriber/.yarn/cache/tno-core-npm-1.0.30-168708be52-33f6b3545a.zip differ diff --git a/app/subscriber/package.json b/app/subscriber/package.json index 52ba193330..12d77683d4 100644 --- a/app/subscriber/package.json +++ b/app/subscriber/package.json @@ -48,7 +48,7 @@ "sheetjs": "file:packages/xlsx-0.20.1.tgz", "styled-components": "6.1.11", "stylis": "4.3.2", - "tno-core": "1.0.29" + "tno-core": "1.0.30" }, "devDependencies": { "@testing-library/jest-dom": "6.6.3", diff --git a/app/subscriber/yarn.lock b/app/subscriber/yarn.lock index 52bec149b0..3837d403dc 100644 --- a/app/subscriber/yarn.lock +++ b/app/subscriber/yarn.lock @@ -12042,7 +12042,7 @@ __metadata: sheetjs: "file:packages/xlsx-0.20.1.tgz" styled-components: 6.1.11 stylis: 4.3.2 - tno-core: 1.0.29 + tno-core: 1.0.30 typescript: 4.9.5 vitest: 3.0.7 languageName: unknown @@ -16397,9 +16397,9 @@ __metadata: languageName: node linkType: hard -"tno-core@npm:1.0.29": - version: 1.0.29 - resolution: "tno-core@npm:1.0.29" +"tno-core@npm:1.0.30": + version: 1.0.30 + resolution: "tno-core@npm:1.0.30" dependencies: "@elastic/elasticsearch": ^8.13.1 "@fortawesome/free-solid-svg-icons": ^6.4.2 @@ -16432,7 +16432,7 @@ __metadata: styled-components: ^6.1.11 stylis: ^4.3.2 yup: ^1.1.1 - checksum: 277dacbb5080703241317cb524e6faa7f7dd6e9af7b8bfb7e247d72f185c344cf2a486d6349960a2cf239c06f41c1124242d3e1c962ca37cee9e57de70e2ec62 + checksum: 33f6b3545abd77bf5f34848cf0a25d240e1d8ddfd4e542fa18be357977e5d584f60668eb01c2d5e3f10052eaec83366ee9f07b2c245a9176bf7b47a1bc9952f3 languageName: node linkType: hard diff --git a/db/kafka/migrations/U1.0.0.sh b/db/kafka/migrations/U1.0.0.sh index cf5bef2831..93b429b661 100755 --- a/db/kafka/migrations/U1.0.0.sh +++ b/db/kafka/migrations/U1.0.0.sh @@ -5,5 +5,6 @@ docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic notify --b docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic index --bootstrap-server $bootstrap" docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic reporting --bootstrap-server $bootstrap" docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic transcribe --bootstrap-server $bootstrap" +docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic request-clips --bootstrap-server $bootstrap" docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic ffmpeg --bootstrap-server $bootstrap" docker exec -i tno-broker bash -c "/bin/kafka-topics --delete --topic event-schedule --bootstrap-server $bootstrap" diff --git a/db/kafka/migrations/V1.0.0.sh b/db/kafka/migrations/V1.0.0.sh index 83ac9d4384..77ac9851b5 100755 --- a/db/kafka/migrations/V1.0.0.sh +++ b/db/kafka/migrations/V1.0.0.sh @@ -1,19 +1,21 @@ #!/bin/bash # Topics for media capture -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic hub --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic notify --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic index --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic reporting --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic transcribe --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic ffmpeg --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" -docker exec -i tno-broker bash -c "/bin/kafka-topics --create --topic event-schedule --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic hub --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic notify --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic index --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic reporting --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic transcribe --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic request-clips --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic ffmpeg --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" +docker exec -i tno-broker bash -c "/bin/kafka-topics --create --if-not-exists --topic event-schedule --bootstrap-server $bootstrap --partitions $partitions --replication-factor $replication" ## Manually add topics -# /bin/kafka-topics --create --topic hub --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic notify --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic index --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic reporting --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic transcribe --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic ffmpeg --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 -# /bin/kafka-topics --create --topic event-schedule --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic hub --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic notify --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic index --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic reporting --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic transcribe --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic request-clips --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic ffmpeg --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 +# /bin/kafka-topics --create --if-not-exists --topic event-schedule --bootstrap-server kafka-headless:29092 --partitions 6 --replication-factor 3 diff --git a/libs/net/entities/WorkOrderType.cs b/libs/net/entities/WorkOrderType.cs index fa16cd685b..89d292005d 100644 --- a/libs/net/entities/WorkOrderType.cs +++ b/libs/net/entities/WorkOrderType.cs @@ -24,4 +24,9 @@ public enum WorkOrderType /// A request to process file with FFmpeg. /// FFmpeg = 3, + + /// + /// A request to generate clips and transcripts via the auto clipper pipeline. + /// + AutoClip = 4, } diff --git a/libs/net/kafka/Interfaces/IKafkaMessenger.cs b/libs/net/kafka/Interfaces/IKafkaMessenger.cs index 07f18a0219..58769e22de 100644 --- a/libs/net/kafka/Interfaces/IKafkaMessenger.cs +++ b/libs/net/kafka/Interfaces/IKafkaMessenger.cs @@ -36,6 +36,14 @@ public interface IKafkaMessenger /// public Task?> SendMessageAsync(string topic, TranscriptRequestModel request); + /// + /// Send a message to Kafka. + /// + /// + /// + /// + public Task?> SendMessageAsync(string topic, ClipRequestModel request); + /// /// Send a message to Kafka. /// diff --git a/libs/net/kafka/KafkaMessenger.cs b/libs/net/kafka/KafkaMessenger.cs index 5482a39ba9..65f711929c 100644 --- a/libs/net/kafka/KafkaMessenger.cs +++ b/libs/net/kafka/KafkaMessenger.cs @@ -92,6 +92,19 @@ public KafkaMessenger(IOptions serializerOptions, IOption return await SendMessageAsync(topic, $"{request.ContentId}", request); } + /// + /// Send a message to to Kafka. + /// + /// + /// + /// + public async Task?> SendMessageAsync(string topic, ClipRequestModel request) + { + if (request == null) throw new ArgumentNullException(nameof(request)); + + return await SendMessageAsync(topic, $"{request.ContentId}", request); + } + /// /// Send a message to to Kafka. /// diff --git a/libs/net/kafka/Models/ClipRequestModel.cs b/libs/net/kafka/Models/ClipRequestModel.cs new file mode 100644 index 0000000000..1e7da45cad --- /dev/null +++ b/libs/net/kafka/Models/ClipRequestModel.cs @@ -0,0 +1,77 @@ +using System.Text.Json; +using TNO.API.Areas.Services.Models.Content; +using TNO.Entities; + +namespace TNO.Kafka.Models; + +/// +/// ClipRequestModel class, provides a model for requesting automatic clip generation via Azure Video Analyzer. +/// +public class ClipRequestModel : WorkOrderModel +{ + #region Properties + /// + /// get/set - The content Id to process. + /// + public long ContentId { get; set; } + + /// + /// get/set - Preferred language for the transcript generation. + /// + public string Language { get; set; } = "en-US"; + #endregion + + #region Constructors + /// + /// Creates a new instance of a ClipRequestModel object. + /// + public ClipRequestModel() : base(WorkOrderType.AutoClip) { } + + /// + /// Creates a new instance of a ClipRequestModel object, initializes with specified parameters. + /// + /// + /// + /// + /// + /// + public ClipRequestModel(long workOrderId, long contentId, int? requestorId, string requestor, string language = "en-US") + : base(workOrderId, WorkOrderType.AutoClip, requestorId, requestor, DateTime.UtcNow) + { + this.ContentId = contentId; + if (!string.IsNullOrWhiteSpace(language)) this.Language = language; + } + + /// + /// Creates a new instance of a ClipRequestModel object for the specified content model. + /// + /// + /// + /// + /// + public ClipRequestModel(ContentModel content, int? requestorId, string requestor, string language = "en-US") + : this(0, content.Id, requestorId, requestor, language) + { + } + + /// + /// Creates a new instance of a ClipRequestModel object, initializes with specified parameters. + /// + /// + public ClipRequestModel(WorkOrder workOrder) : base(workOrder) + { + if (workOrder.ContentId.HasValue) + this.ContentId = workOrder.ContentId.Value; + else if (workOrder.Configuration.RootElement.TryGetProperty("contentId", out JsonElement element) && element.TryGetInt64(out long contentId)) + this.ContentId = contentId; + else throw new ArgumentException("Work order must be for an auto clipper request and contain 'contentId' property."); + + if (workOrder.Configuration.RootElement.TryGetProperty("language", out JsonElement languageElement) && languageElement.ValueKind == JsonValueKind.String) + { + var language = languageElement.GetString(); + if (!string.IsNullOrWhiteSpace(language)) this.Language = language!; + } + } + #endregion +} + diff --git a/libs/net/kafka/Models/TrascriptRequestModel.cs b/libs/net/kafka/Models/TranscriptRequestModel.cs similarity index 100% rename from libs/net/kafka/Models/TrascriptRequestModel.cs rename to libs/net/kafka/Models/TranscriptRequestModel.cs diff --git a/libs/net/models/Areas/Services/Models/Content/ContentTagModel.cs b/libs/net/models/Areas/Services/Models/Content/ContentTagModel.cs index da99b3e419..099e057459 100644 --- a/libs/net/models/Areas/Services/Models/Content/ContentTagModel.cs +++ b/libs/net/models/Areas/Services/Models/Content/ContentTagModel.cs @@ -47,6 +47,20 @@ public ContentTagModel(string code) this.Code = code; } + /// + /// Creates a new instance of an ContentTagModel, initializes with specified parameter. + /// + /// + /// + /// + /// + public ContentTagModel(int tagId, string code, string name) + { + this.Id = tagId; + this.Code = code; + this.Name = name; + } + /// /// Creates a new instance of an ContentTagModel, initializes with specified parameter. /// diff --git a/libs/net/services/Helpers/ApiService.cs b/libs/net/services/Helpers/ApiService.cs index ca94b9ce2a..061c0154c9 100644 --- a/libs/net/services/Helpers/ApiService.cs +++ b/libs/net/services/Helpers/ApiService.cs @@ -1,7 +1,6 @@ using System.Net; using System.Net.Http.Headers; using System.Net.Http.Json; -using System.Text; using System.Text.Json; using FTTLib; using Microsoft.AspNetCore.WebUtilities; @@ -250,6 +249,14 @@ public async Task GetLookupsResponseWithEtagAsync(string et } #endregion + #region Tags + public async Task GetTagsResponseWithEtagAsync(string etag) + { + var url = this.Options.ApiUrl.Append($"editor/tags"); + return await RetryRequestAsync(async () => await this.OpenClient.GetAsync(url, etag)); + } + #endregion + #region Data Location Methods /// /// Make an HTTP request to the api to get the data location for the specified 'id'. diff --git a/libs/net/services/Helpers/IApiService.cs b/libs/net/services/Helpers/IApiService.cs index 96240deb1f..4be65e8c72 100644 --- a/libs/net/services/Helpers/IApiService.cs +++ b/libs/net/services/Helpers/IApiService.cs @@ -75,6 +75,10 @@ public interface IApiService public Task GetLookupsResponseWithEtagAsync(string etag); #endregion + #region Tags + Task GetTagsResponseWithEtagAsync(string etag); + #endregion + #region Sources /// /// Make a request to the API to fetch all sources. diff --git a/libs/npm/core/package.json b/libs/npm/core/package.json index 9d299b795c..48bb46a0bf 100644 --- a/libs/npm/core/package.json +++ b/libs/npm/core/package.json @@ -1,7 +1,7 @@ { "name": "tno-core", "description": "TNO shared library", - "version": "1.0.29", + "version": "1.0.30", "homepage": "https://github.com/bcgov/tno", "license": "Apache-2.0", "files": [ diff --git a/libs/npm/core/src/hooks/api/constants/WorkOrderType.ts b/libs/npm/core/src/hooks/api/constants/WorkOrderType.ts index 22be3dae18..fb1bec46e1 100644 --- a/libs/npm/core/src/hooks/api/constants/WorkOrderType.ts +++ b/libs/npm/core/src/hooks/api/constants/WorkOrderType.ts @@ -18,4 +18,9 @@ export enum WorkOrderType { /// A request for content to be sent for FFmpeg actions. /// FFmpeg = 3, + + /// + /// A request for content to be sent for auto clipping and transcription. + /// + AutoClip = 4, } diff --git a/libs/npm/core/src/hooks/api/constants/WorkOrderTypeName.ts b/libs/npm/core/src/hooks/api/constants/WorkOrderTypeName.ts index b063e8f9f8..410dd4fe85 100644 --- a/libs/npm/core/src/hooks/api/constants/WorkOrderTypeName.ts +++ b/libs/npm/core/src/hooks/api/constants/WorkOrderTypeName.ts @@ -18,4 +18,9 @@ export enum WorkOrderTypeName { /// A request for content to be sent for FFmpeg actions. /// FFmpeg = 'FFmpeg', + + /// + /// A request for content to be sent for auto clipping and transcription. + /// + AutoClip = 'AutoClip', } diff --git a/libs/npm/core/src/hooks/api/editor/useApiEditorWorkOrders.ts b/libs/npm/core/src/hooks/api/editor/useApiEditorWorkOrders.ts index 919ab5ced6..285d1fed8b 100644 --- a/libs/npm/core/src/hooks/api/editor/useApiEditorWorkOrders.ts +++ b/libs/npm/core/src/hooks/api/editor/useApiEditorWorkOrders.ts @@ -36,6 +36,11 @@ export const useApiEditorWorkOrders = ( `/editor/work/orders/transcribe/${content.id}`, ); }, + autoClip: (content: IContentModel) => { + return api.post, any>( + `/editor/work/orders/auto-clip/${content.id}`, + ); + }, nlp: (content: IContentModel) => { return api.post, any>( `/editor/work/orders/nlp/${content.id}`, diff --git a/openshift/kustomize/services/auto-clipper/base/config-map.yaml b/openshift/kustomize/services/auto-clipper/base/config-map.yaml new file mode 100644 index 0000000000..da3bad36d1 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/base/config-map.yaml @@ -0,0 +1,25 @@ +--- +# Configuration settings +kind: ConfigMap +apiVersion: v1 +metadata: + name: auto-clipper-service + namespace: default + annotations: + description: Transcription service configuration settings + created-by: jeremy.foster + labels: + name: auto-clipper-service + part-of: tno + version: 1.0.0 + component: auto-clipper-service + managed-by: kustomize +data: + KAFKA_CLIENT_ID: AutoClipper + MAX_FAIL_LIMIT: "5" + TOPICS: request-clips + VOLUME_PATH: /data + CHES_EMAIL_ENABLED: "true" + CHES_EMAIL_AUTHORIZED: "true" + LLM_API_URL: "https://mmi-ai-foundry-east-us-2.openai.azure.com/openai/v1/chat/completions" + LLM_MODEL_NAME: "gpt-5.1-chat" diff --git a/openshift/kustomize/services/auto-clipper/base/deploy.yaml b/openshift/kustomize/services/auto-clipper/base/deploy.yaml new file mode 100644 index 0000000000..10f68f2c73 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/base/deploy.yaml @@ -0,0 +1,272 @@ +--- +# How the app will be deployed to the pod. +kind: Deployment +apiVersion: apps/v1 +metadata: + name: auto-clipper-service + namespace: default + annotations: + description: Defines how to deploy auto-clipper-service + created-by: jeremy.foster + image.openshift.io/triggers: '[{"from": {"kind": "ImageStreamTag", "name": "auto-clipper-service:dev", "namespace": "9b301c-tools"}, "fieldPath": "spec.template.spec.containers[?(@.name==\"auto-clipper-service\")].image"}]' + labels: + name: auto-clipper-service + part-of: tno + version: 1.0.0 + component: auto-clipper-service + managed-by: kustomize +spec: + replicas: 1 + selector: + matchLabels: + name: auto-clipper-service + part-of: tno + component: auto-clipper-service + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + template: + metadata: + name: auto-clipper-service + labels: + name: auto-clipper-service + part-of: tno + component: auto-clipper-service + spec: + volumes: + - name: api-storage + persistentVolumeClaim: + claimName: api-storage + containers: + - name: auto-clipper-service + image: image-registry.openshift-image-registry.svc:5000/9b301c-tools/auto-clipper-service:prod + imagePullPolicy: Always + ports: + - containerPort: 8080 + protocol: TCP + volumeMounts: + - name: api-storage + mountPath: /data + resources: + requests: + cpu: 20m + memory: 100Mi + limits: + cpu: 50m + memory: 2Gi + env: + # .NET Configuration + - name: ASPNETCORE_ENVIRONMENT + value: Production + - name: ASPNETCORE_URLS + value: http://+:8080 + + - name: Serilog__MinimumLevel__Override__TNO + value: Information + + # Common Service Configuration + - name: Service__ApiUrl + valueFrom: + configMapKeyRef: + name: services + key: API_HOST_URL + - name: Service__EmailTo + valueFrom: + configMapKeyRef: + name: services + key: EMAIL_FAILURE_TO + - name: Service__NoticeEmailTo + valueFrom: + configMapKeyRef: + name: services + key: EMAIL_NOTICE_TO + + # Authentication Configuration + - name: Auth__Keycloak__Authority + valueFrom: + configMapKeyRef: + name: services + key: KEYCLOAK_AUTHORITY + - name: Auth__Keycloak__Audience + valueFrom: + configMapKeyRef: + name: services + key: KEYCLOAK_AUDIENCE + - name: Auth__Keycloak__Secret + valueFrom: + secretKeyRef: + name: keycloak + key: KEYCLOAK_CLIENT_SECRET + + - name: Kafka__Admin__ClientId + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: KAFKA_CLIENT_ID + - name: Kafka__Admin__BootstrapServers + valueFrom: + configMapKeyRef: + name: services + key: KAFKA_BOOTSTRAP_SERVERS + + - name: Kafka__Consumer__GroupId + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: KAFKA_CLIENT_ID + - name: Kafka__Consumer__BootstrapServers + valueFrom: + configMapKeyRef: + name: services + key: KAFKA_BOOTSTRAP_SERVERS + + - name: Kafka__Producer__ClientId + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: KAFKA_CLIENT_ID + - name: Kafka__Producer__BootstrapServers + valueFrom: + configMapKeyRef: + name: services + key: KAFKA_BOOTSTRAP_SERVERS + + # Azure Speech Services Configuration + - name: Service__AzureSpeechRegion + valueFrom: + secretKeyRef: + name: azure-speech-services + key: AZURE_REGION + - name: Service__AzureSpeechKey + valueFrom: + secretKeyRef: + name: azure-speech-services + key: AZURE_SPEECH_SERVICES_KEY + - name: Service__AzureSpeechStorageConnectionString + valueFrom: + secretKeyRef: + name: azure-speech-services + key: STORAGE_CONNECTIONSTRING + - name: Service__AzureSpeechStorageContainer + valueFrom: + secretKeyRef: + name: azure-speech-services + key: STORAGE_CONTAINER + - name: Service__LlmApiKey + valueFrom: + secretKeyRef: + name: azure-openai + key: AZURE_OPENAI_KEY + + # Service Configuration + - name: Service__MaxFailLimit + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: MAX_FAIL_LIMIT + - name: Service__Topics + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: TOPICS + - name: Service__VolumePath + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: VOLUME_PATH + - name: Service__LlmApiUrl + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: LLM_API_URL + - name: Service__LlmDefaultModel + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: LLM_MODEL_NAME + + # S3 Configuration + - name: S3__AccessKey + valueFrom: + secretKeyRef: + name: s3-backup-credentials + key: S3_ACCESS_KEY + - name: S3__SecretKey + valueFrom: + secretKeyRef: + name: s3-backup-credentials + key: S3_SECRET_KEY + - name: S3__BucketName + valueFrom: + secretKeyRef: + name: s3-backup-credentials + key: S3_BUCKET_NAME + - name: S3__ServiceUrl + valueFrom: + secretKeyRef: + name: s3-backup-credentials + key: S3_SERVICE_URL + + # CHES Configuration + - name: CHES__From + valueFrom: + configMapKeyRef: + name: ches + key: CHES_FROM + - name: CHES__EmailEnabled + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: CHES_EMAIL_ENABLED + - name: CHES__EmailAuthorized + valueFrom: + configMapKeyRef: + name: auto-clipper-service + key: CHES_EMAIL_AUTHORIZED + + - name: CHES__AuthUrl + valueFrom: + configMapKeyRef: + name: ches + key: CHES_AUTH_URL + - name: CHES__HostUri + valueFrom: + configMapKeyRef: + name: ches + key: CHES_HOST_URI + - name: CHES__Username + valueFrom: + secretKeyRef: + name: ches + key: USERNAME + - name: CHES__Password + valueFrom: + secretKeyRef: + name: ches + key: PASSWORD + livenessProbe: + httpGet: + path: "/health" + port: 8080 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: "/health" + port: 8080 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 + dnsPolicy: ClusterFirst + restartPolicy: Always + securityContext: {} + terminationGracePeriodSeconds: 30 diff --git a/openshift/kustomize/services/auto-clipper/base/kustomization.yaml b/openshift/kustomize/services/auto-clipper/base/kustomization.yaml new file mode 100644 index 0000000000..8075511b29 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/base/kustomization.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - config-map.yaml + - deploy.yaml + - service.yaml + +generatorOptions: + disableNameSuffixHash: true diff --git a/openshift/kustomize/services/auto-clipper/base/secret.yaml b/openshift/kustomize/services/auto-clipper/base/secret.yaml new file mode 100644 index 0000000000..87256f77b2 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/base/secret.yaml @@ -0,0 +1,36 @@ +kind: Secret +apiVersion: v1 +metadata: + name: azure-speech-services + namespace: default + annotations: + description: Azure Speech Services secrets + created-by: jeremy.foster + labels: + name: azure-speech-services + part-of: tno + version: 1.0.0 + component: transcription + managed-by: kustomize +type: Opaque +stringData: + AZURE_REGION: ${AZURE_REGION} + AZURE_SPEECH_SERVICES_KEY: ${AZURE_SPEECH_SERVICES_KEY} +--- +kind: Secret +apiVersion: v1 +metadata: + name: azure-openai + namespace: default + annotations: + description: Azure Cognitive Services secrets + created-by: jeremy.foster + labels: + name: azure-openai + part-of: tno + version: 1.0.0 + component: ai + managed-by: kustomize +type: Opaque +stringData: + AZURE_OPENAI_KEY: ${AZURE_OPENAI_KEY} diff --git a/openshift/kustomize/services/auto-clipper/base/service.yaml b/openshift/kustomize/services/auto-clipper/base/service.yaml new file mode 100644 index 0000000000..229626c630 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/base/service.yaml @@ -0,0 +1,27 @@ +--- +# Open up ports to communicate with the app. +kind: Service +apiVersion: v1 +metadata: + name: auto-clipper-service + namespace: default + annotations: + description: Exposes and load balances the application pods. + created-by: jeremy.foster + labels: + name: auto-clipper-service + part-of: tno + version: 1.0.0 + component: auto-clipper-service + managed-by: kustomize +spec: + ports: + - name: 8080-tcp + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + part-of: tno + component: auto-clipper-service + sessionAffinity: None + type: ClusterIP diff --git a/openshift/kustomize/services/auto-clipper/overlays/dev/kustomization.yaml b/openshift/kustomize/services/auto-clipper/overlays/dev/kustomization.yaml new file mode 100644 index 0000000000..4ca71785f5 --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/overlays/dev/kustomization.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: 9b301c-dev + +resources: + - ../../base + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: + - name: azure-speech-services + type: stringData + env: azure.env + - name: azure-openai + type: stringData + env: openai.env + +patches: + - target: + kind: Deployment + name: auto-clipper-service + patch: |- + - op: replace + path: /spec/replicas + value: 2 + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: 20m + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: 100Mi + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: 50m + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: 300Mi + - op: replace + path: /spec/template/spec/containers/0/image + value: image-registry.openshift-image-registry.svc:5000/9b301c-tools/auto-clipper-service:dev diff --git a/openshift/kustomize/services/auto-clipper/overlays/prod/kustomization.yaml b/openshift/kustomize/services/auto-clipper/overlays/prod/kustomization.yaml new file mode 100644 index 0000000000..d783baf42f --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/overlays/prod/kustomization.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: 9b301c-prod + +resources: + - ../../base + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: + - name: azure-speech-services + type: stringData + env: azure.env + - name: azure-openai + type: stringData + env: openai.env + +patches: + - target: + kind: Deployment + name: auto-clipper-service + patch: |- + - op: replace + path: /spec/replicas + value: 6 + - op: replace + path: /metadata/annotations/image.openshift.io~1triggers + value: '[{"from": {"kind": "ImageStreamTag", "name": "auto-clipper-service:prod", "namespace": "9b301c-tools"}, "fieldPath": "spec.template.spec.containers[?(@.name==\"transcription-service\")].image"}]' + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: 20m + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: 80Mi + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: 50m + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: 2Gi diff --git a/openshift/kustomize/services/auto-clipper/overlays/test/kustomization.yaml b/openshift/kustomize/services/auto-clipper/overlays/test/kustomization.yaml new file mode 100644 index 0000000000..036910c0cb --- /dev/null +++ b/openshift/kustomize/services/auto-clipper/overlays/test/kustomization.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: 9b301c-test + +resources: + - ../../base + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: + - name: azure-speech-services + type: stringData + env: azure.env + - name: azure-openai + type: stringData + env: openai.env + +patches: + - target: + kind: Deployment + name: auto-clipper-service + patch: |- + - op: replace + path: /spec/replicas + value: 1 + - op: replace + path: /spec/template/spec/containers/0/image + value: image-registry.openshift-image-registry.svc:5000/9b301c-tools/auto-clipper-service:test + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: 20m + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: 80Mi + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: 75m + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: 300Mi diff --git a/openshift/kustomize/services/build/base/auto-clipper.yaml b/openshift/kustomize/services/build/base/auto-clipper.yaml new file mode 100644 index 0000000000..6f0756dd2c --- /dev/null +++ b/openshift/kustomize/services/build/base/auto-clipper.yaml @@ -0,0 +1,59 @@ +--- +# The final build image. +kind: ImageStream +apiVersion: image.openshift.io/v1 +metadata: + name: auto-clipper-service + annotations: + description: Destination for built images. + created-by: jeremy.foster + labels: + name: auto-clipper-service + part-of: tno + version: 1.0.0 + component: auto-clipper + managed-by: kustomize + +--- +# The build config that will be created will be named for the branch you created it for. +kind: BuildConfig +apiVersion: build.openshift.io/v1 +metadata: + name: auto-clipper-service.dev + annotations: + description: Build image from Dockerfile in git repo. + created-by: jeremy.foster + labels: + name: auto-clipper-service + part-of: tno + version: 1.0.0 + component: auto-clipper + managed-by: kustomize + branch: dev +spec: + completionDeadlineSeconds: 1800 + triggers: + - type: ImageChange + - type: ConfigChange + runPolicy: Serial + source: + git: + uri: https://github.com/bcgov/tno.git + ref: dev + contextDir: ./ + strategy: + type: Docker + dockerStrategy: + imageOptimizationPolicy: SkipLayers + dockerfilePath: services/net/auto-clipper/Dockerfile + output: + to: + kind: ImageStreamTag + name: auto-clipper-service:latest + resources: + requests: + cpu: 20m + memory: 250Mi + limits: + cpu: 500m + memory: 2Gi diff --git a/openshift/kustomize/tekton/base/tasks/build-all.yaml b/openshift/kustomize/tekton/base/tasks/build-all.yaml index b4dda092e1..22332314f9 100644 --- a/openshift/kustomize/tekton/base/tasks/build-all.yaml +++ b/openshift/kustomize/tekton/base/tasks/build-all.yaml @@ -242,6 +242,13 @@ spec: [dockerfile]="/tools/elastic/migration/Dockerfile" ) + declare -A COMPONENT21=( + [id]="auto-clipper" + [image]="auto-clipper-service" + [context]="" + [dockerfile]="/services/net/auto-clipper/Dockerfile" + ) + declare -n component; # Loop through each component and build if required. diff --git a/openshift/kustomize/tekton/base/tasks/deploy-all-deployment.yaml b/openshift/kustomize/tekton/base/tasks/deploy-all-deployment.yaml index e846c4ff9e..4eb4c9018c 100644 --- a/openshift/kustomize/tekton/base/tasks/deploy-all-deployment.yaml +++ b/openshift/kustomize/tekton/base/tasks/deploy-all-deployment.yaml @@ -32,15 +32,15 @@ spec: - name: TIMEOUT description: The timeout before it will stop waiting for the pod to become available. type: string - default: '600s' + default: "600s" - name: DEPLOY description: Whether to deploy a component, or all [*|all|[name]]. - default: '*' + default: "*" - name: WAIT description: Whether to wait for each service pod to scale back up [yes|no]. - default: 'no' + default: "no" workspaces: - name: conditions mountPath: /data @@ -270,6 +270,16 @@ spec: [env]="dev test prod" ) + declare -A COMPONENT20=( + [id]="auto-clipper" + [name]="auto-clipper-service" + [type]="deployment" + [replicas]="1" + [action]="stop" + [build]="yes" + [env]="dev test prod" + ) + declare -n component; # *************************************************** diff --git a/openshift/kustomize/tekton/base/tasks/git-conditions.yaml b/openshift/kustomize/tekton/base/tasks/git-conditions.yaml index 9bca920515..a367cb053a 100644 --- a/openshift/kustomize/tekton/base/tasks/git-conditions.yaml +++ b/openshift/kustomize/tekton/base/tasks/git-conditions.yaml @@ -177,6 +177,7 @@ spec: build_service 'scheduler' build_service 'syndication' build_service 'transcription' + build_service 'auto-clipper' echo "Build the following component." cat build.env | xargs echo diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 0f33315f63..74f1cf3146 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -13,10 +13,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/syndication/.env @@ -47,10 +47,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/fileupload/.env @@ -81,10 +81,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/image/.env @@ -117,10 +117,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 500M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/filemonitor/.env @@ -153,10 +153,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/content/.env @@ -189,10 +189,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/contentmigration/.env @@ -226,10 +226,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/indexing/.env @@ -261,10 +261,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/transcription/.env @@ -284,6 +284,42 @@ services: retries: 3 start_period: 30s + auto-clipper: + image: tno:auto-clipper + profiles: + - all + - service + restart: "no" + container_name: tno-auto-clipper + build: + context: ./ + dockerfile: services/net/auto-clipper/Dockerfile + deploy: + resources: + limits: + cpus: "0.25" + memory: 250M + reservations: + cpus: "0.05" + memory: 50M + env_file: + - services/net/auto-clipper/.env + ports: + - ${AUTO_CLIPPER_PORT:-40029}:8081 + depends_on: + - api + - broker + networks: + - tno + volumes: + - tno-api-data:/data + healthcheck: + test: curl -s -f http://localhost:8081/health || exit 1 + interval: 1m + timeout: 10s + retries: 3 + start_period: 30s + nlp: image: tno:nlp profiles: @@ -297,10 +333,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/nlp/.env @@ -331,10 +367,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/notification/.env @@ -367,10 +403,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/reporting/.env @@ -403,10 +439,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/folder-collection/.env @@ -424,7 +460,6 @@ services: retries: 3 start_period: 30s - extract-quotes: image: tno:extract-quotes profiles: @@ -438,10 +473,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/extract-quotes/.env @@ -466,10 +501,10 @@ services: deploy: resources: limits: - cpus: '1' + cpus: "1" memory: 2G reservations: - cpus: '0.05' + cpus: "0.05" memory: 100M env_file: - services/net/ffmpeg/.env @@ -502,10 +537,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/scheduler/.env @@ -536,10 +571,10 @@ services: deploy: resources: limits: - cpus: '0.25' + cpus: "0.25" memory: 250M reservations: - cpus: '0.05' + cpus: "0.05" memory: 50M env_file: - services/net/event-handler/.env diff --git a/services/net/TNO.Services.sln b/services/net/TNO.Services.sln index 58989e2899..1598af4791 100644 --- a/services/net/TNO.Services.sln +++ b/services/net/TNO.Services.sln @@ -87,8 +87,16 @@ Global {E99C7AE8-6388-412C-9C35-F7148E2B5DFE}.Debug|Any CPU.Build.0 = Debug|Any CPU {E99C7AE8-6388-412C-9C35-F7148E2B5DFE}.Release|Any CPU.ActiveCfg = Release|Any CPU {E99C7AE8-6388-412C-9C35-F7148E2B5DFE}.Release|Any CPU.Build.0 = Release|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Debug|Any CPU.Build.0 = Debug|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Release|Any CPU.ActiveCfg = Release|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Debug|Any CPU.Build.0 = Debug|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Release|Any CPU.ActiveCfg = Release|Any CPU + {60B05E2B-CD93-4D44-9FF0-F84135B40E03}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/services/net/auto-clipper/.dockerignore b/services/net/auto-clipper/.dockerignore new file mode 100644 index 0000000000..7ed9d732a6 --- /dev/null +++ b/services/net/auto-clipper/.dockerignore @@ -0,0 +1,18 @@ +.vs/ +.env + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +**/[Bb]in/ +**/[Oo]bj/ +**/[Oo]ut/ +msbuild.log +msbuild.err +msbuild.wrn diff --git a/services/net/auto-clipper/Audio/AudioNormalizer.cs b/services/net/auto-clipper/Audio/AudioNormalizer.cs new file mode 100644 index 0000000000..62c5b0470e --- /dev/null +++ b/services/net/auto-clipper/Audio/AudioNormalizer.cs @@ -0,0 +1,69 @@ +using System.Diagnostics; +using Microsoft.Extensions.Logging; + +namespace TNO.Services.AutoClipper.Audio; + +/// +/// AudioNormalizer class, provides a way to normalize files to ensure we only send wav files for transcription. +/// +public class AudioNormalizer : IAudioNormalizer +{ + private readonly ILogger _logger; + + public AudioNormalizer(ILogger logger) + { + _logger = logger; + } + + /// + /// Create a new wav file if required. + /// + /// + /// + /// + /// + /// + public async Task NormalizeAsync(string sourceFile, int targetSampleRate, CancellationToken? cancellationToken = default) + { + var directory = Path.GetDirectoryName(sourceFile) ?? "."; + var normalizedFile = Path.Combine(directory, $"{Path.GetFileNameWithoutExtension(sourceFile)}.{targetSampleRate}hz.normalized.wav"); + + if (File.Exists(normalizedFile)) + { + var sourceInfo = new FileInfo(sourceFile); + var normalizedInfo = new FileInfo(normalizedFile); + if (normalizedInfo.LastWriteTimeUtc >= sourceInfo.LastWriteTimeUtc) + { + _logger.LogDebug("Using existing normalized file {File}", normalizedFile); + return normalizedFile; + } + } + + _logger.LogInformation("Normalizing audio {Source} -> {Dest}", sourceFile, normalizedFile); + Directory.CreateDirectory(directory); + + var process = new Process(); + if (OperatingSystem.IsWindows()) + { + process.StartInfo.FileName = "cmd"; + process.StartInfo.Arguments = $"/c ffmpeg -y -i \"{sourceFile}\" -ar {targetSampleRate} -ac 1 -c:a pcm_s16le \"{normalizedFile}\""; + } + else + { + process.StartInfo.FileName = "/bin/sh"; + process.StartInfo.Arguments = $"-c \"ffmpeg -y -i '{sourceFile}' -ar {targetSampleRate} -ac 1 -c:a pcm_s16le '{normalizedFile}' 2>&1\""; + } + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardOutput = true; + process.StartInfo.CreateNoWindow = true; + process.Start(); + var output = await process.StandardOutput.ReadToEndAsync(); + await process.WaitForExitAsync(cancellationToken ?? default); + if (process.ExitCode != 0) + { + _logger.LogError("ffmpeg normalization failed: {Output}", output); + throw new InvalidOperationException($"Failed to normalize audio: {sourceFile}"); + } + return normalizedFile; + } +} diff --git a/services/net/auto-clipper/Audio/IAudioNormalizer.cs b/services/net/auto-clipper/Audio/IAudioNormalizer.cs new file mode 100644 index 0000000000..845131fea9 --- /dev/null +++ b/services/net/auto-clipper/Audio/IAudioNormalizer.cs @@ -0,0 +1,6 @@ +namespace TNO.Services.AutoClipper.Audio; + +public interface IAudioNormalizer +{ + Task NormalizeAsync(string sourceFile, int targetSampleRate, CancellationToken? cancellationToken = default); +} diff --git a/services/net/auto-clipper/AutoClipperManager.cs b/services/net/auto-clipper/AutoClipperManager.cs new file mode 100644 index 0000000000..2f4ca65615 --- /dev/null +++ b/services/net/auto-clipper/AutoClipperManager.cs @@ -0,0 +1,683 @@ +using System.Globalization; +using System.Text; +using Confluent.Kafka; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TNO.API.Areas.Services.Models.Content; +using TNO.Ches; +using TNO.Ches.Configuration; +using TNO.Core.Exceptions; +using TNO.Core.Extensions; +using TNO.Core.Storage; +using TNO.Entities; +using TNO.Kafka; +using TNO.Kafka.Models; +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.Config; +using TNO.Services.AutoClipper.Exceptions; +using TNO.Services.AutoClipper.LLM; +using TNO.Services.AutoClipper.Pipeline; +using TNO.Services.Managers; +using TNO.Services.Runners; + +namespace TNO.Services.AutoClipper; + +/// +/// AutoClipperManager class, provides a Kafka Consumer service which imports audio from all active topics. +/// +public class AutoClipperManager : ServiceManager +{ + #region Variables + private readonly IS3StorageService _s3StorageService; + private readonly ClipProcessingPipeline _processingPipeline; + private readonly IStationConfigurationService _stationConfiguration; + private CancellationTokenSource? _cancelToken; + private Task? _consumer; + private readonly TaskStatus[] _notRunning = new TaskStatus[] { TaskStatus.Canceled, TaskStatus.Faulted, TaskStatus.RanToCompletion }; + private readonly WorkOrderStatus[] _ignoreWorkOrders = new WorkOrderStatus[] { WorkOrderStatus.Completed, WorkOrderStatus.Cancelled }; + private int _retries = 0; + private string? _etag = null; + private API.Areas.Editor.Models.Tag.TagModel[]? _tags = []; + #endregion + + #region Properties + /// + /// get - Kafka Consumer object. + /// + protected IKafkaListener Listener { get; private set; } + #endregion + + #region Constructors + /// + /// Creates a new instance of a AutoClipperManager object, initializes with specified parameters. + /// + /// + /// + /// + /// + /// + /// + /// + public AutoClipperManager( + IKafkaListener listener, + ClipProcessingPipeline processingPipeline, + IStationConfigurationService stationConfigurationService, + IApiService api, + IChesService chesService, + IOptions chesOptions, + IOptions options, + ILogger logger, + IS3StorageService s3StorageService) + : base(api, chesService, chesOptions, options, logger) + { + this.Listener = listener; + this.Listener.IsLongRunningJob = true; + this.Listener.OnError += ListenerErrorHandler; + this.Listener.OnStop += ListenerStopHandler; + _s3StorageService = s3StorageService; + _processingPipeline = processingPipeline; + _stationConfiguration = stationConfigurationService; + } + #endregion + + #region Methods + /// + /// Listen to active topics and import content. + /// + /// + public override async Task RunAsync() + { + var delay = this.Options.DefaultDelayMS; + + // Always keep looping until an unexpected failure occurs. + while (true) + { + if (this.State.Status == ServiceStatus.RequestSleep || this.State.Status == ServiceStatus.RequestPause || this.State.Status == ServiceStatus.RequestFailed) + { + // An API request or failures have requested the service to stop. + this.Logger.LogInformation("The service is stopping: '{Status}'", this.State.Status); + this.State.Stop(); + + // The service is stopping or has stopped, consume should stop too. + this.Listener.Stop(); + } + else if (this.State.Status != ServiceStatus.Running) + { + this.Logger.LogDebug("The service is not running: '{Status}'", this.State.Status); + } + else + { + try + { + var topics = this.Options.Topics.Split(',', StringSplitOptions.RemoveEmptyEntries); + + if (topics.Length != 0) + { + await GetTagsAsync(); + this.Listener.Subscribe(topics); + ConsumeMessages(); + } + else if (topics.Length == 0) + { + this.Listener.Stop(); + } + } + catch (Exception ex) + { + this.Logger.LogError(ex, "Service had an unexpected failure."); + this.State.RecordFailure(); + await this.SendErrorEmailAsync("Service had an Unexpected Failure", ex); + } + } + + // The delay ensures we don't have a run away thread. + this.Logger.LogDebug("Service sleeping for {delay} ms", delay); + await Task.Delay(delay); + } + } + + /// + /// Get an array of tags from the API. + /// + /// + private async Task GetTagsAsync() + { + // Fetch tags once for all clips from the API. + var tagsResponse = await this.Api.GetTagsResponseWithEtagAsync(_etag ?? ""); + if (tagsResponse != null && tagsResponse.StatusCode == System.Net.HttpStatusCode.OK) + { + _tags = await this.Api.GetResponseDataAsync(tagsResponse); + _etag = this.Api.GetResponseEtag(tagsResponse); + } + } + + /// + /// Creates a new cancellation token. + /// Create a new Task if the prior one isn't running anymore. + /// + private void ConsumeMessages() + { + if (_consumer == null || _notRunning.Contains(_consumer.Status)) + { + // Make sure the prior task is cancelled before creating a new one. + if (_cancelToken?.IsCancellationRequested == false) + _cancelToken?.Cancel(); + _cancelToken = new CancellationTokenSource(); + _consumer = Task.Run(ListenerHandlerAsync, _cancelToken.Token); + } + } + + /// + /// Keep consuming messages from Kafka until the service stops running. + /// + /// + private async Task ListenerHandlerAsync() + { + while (this.State.Status == ServiceStatus.Running && + _cancelToken?.IsCancellationRequested == false) + { + await this.Listener.ConsumeAsync(HandleMessageAsync, _cancelToken.Token); + } + + // The service is stopping or has stopped, consume should stop too. + this.Listener.Stop(); + } + + /// + /// The Kafka consumer has failed for some reason, need to record the failure. + /// Fatal or unexpected errors will result in a request to stop consuming. + /// + /// + /// + /// True if the consumer should retry the message. + private void ListenerErrorHandler(object sender, ErrorEventArgs e) + { + // Only the first retry will count as a failure. + if (_retries == 0) + this.State.RecordFailure(); + + if (e.GetException() is ConsumeException consume) + { + if (consume.Error.IsFatal) + this.Listener.Stop(); + } + } + + /// + /// The Kafka consumer has stopped which means we need to also cancel the background task associated with it. + /// + /// + /// + private void ListenerStopHandler(object sender, EventArgs e) + { + if (_consumer != null && + !_notRunning.Contains(_consumer.Status) && + _cancelToken != null && !_cancelToken.IsCancellationRequested) + { + _cancelToken.Cancel(); + } + } + + /// + /// Retrieve a file from storage and send to Microsoft Cognitive Services. Obtain + /// the transcription and update the content record accordingly. + /// + /// + /// + private async Task HandleMessageAsync(ConsumeResult result) + { + try + { + var request = result.Message.Value; + // The service has stopped, so to should consuming messages. + if (this.State.Status != ServiceStatus.Running) + { + this.Listener.Stop(); + this.State.Stop(); + } + else + { + var content = await this.Api.FindContentByIdAsync(request.ContentId); + if (content != null) + { + // TODO: Handle multi-threading so that more than one transcription can be performed at a time. + await ProcessClipRequestAsync(request, content); + } + else + { + // Identify requests for transcription for content that does not exist. + this.Logger.LogWarning("Content does not exist for this message. Key: {Key}, Content ID: {ContentId}", result.Message.Key, request.ContentId); + } + + // Inform Kafka this message is completed. + this.Listener.Commit(result); + this.Listener.Resume(); + + // Successful run clears any errors. + this.State.ResetFailures(); + _retries = 0; + } + } + catch (Exception ex) + { + if (ex is HttpClientRequestException httpEx) + { + this.Logger.LogError(ex, "HTTP exception while consuming. {response}", httpEx.Data["body"] ?? ""); + await this.SendErrorEmailAsync("HTTP exception while consuming. {response}", ex); + } + else + { + this.Logger.LogError(ex, "Failed to handle message"); + await this.SendErrorEmailAsync("Failed to handle message", ex); + } + ListenerErrorHandler(this, new ErrorEventArgs(ex)); + } + } + + /// + /// Get local temp directory + /// + /// + private string GetTempDirectory() + { + var tempPath = Path.Join(this.Options.VolumePath, "temp".MakeRelativePath()); + if (!Directory.Exists(tempPath)) + { + Directory.CreateDirectory(tempPath); + } + return tempPath; + } + + /// + /// Clean up temp files that are downloaded from s3 or generated from downloaded s3 file + /// + /// + private static void CleanupS3Files(params string[] files) + { + foreach (var file in files) + { + if (File.Exists(file)) + { + File.Delete(file); + } + } + } + + /// + /// Download S3 files + /// + /// + /// + private async Task DownloadS3File(string? s3Path) + { + if (!string.IsNullOrEmpty(s3Path)) + { + var tempDir = GetTempDirectory(); + var s3FileStream = await _s3StorageService.DownloadFromS3Async(s3Path); + if (s3FileStream != null) + { + var fileName = Path.GetFileName(s3Path); + var tmpFilePath = Path.Combine(tempDir, fileName); + if (File.Exists(tmpFilePath)) + { + File.Delete(tmpFilePath); + } + + using (var fileStream = new FileStream(tmpFilePath, FileMode.Create, FileAccess.Write)) + { + s3FileStream.CopyTo(fileStream); + this.Logger.LogDebug("S3 file {path} is downloaded to: {file}", s3Path, tmpFilePath); + return tmpFilePath; + } + } + else + { + this.Logger.LogError("Cannot download file {file} from S3", s3Path); + } + } + else + { + this.Logger.LogError("S3 file path is empty."); + } + return string.Empty; + } + + /// + /// Delete any files that were copied or generated. + /// + /// + /// + /// + /// + private static async Task CleanUpFilesAsync(IEnumerable generatedClipFiles, bool isSyncedToS3, string downloadedFile) + { + CleanupLocalFiles(generatedClipFiles); + CleanupTemporaryFiles(isSyncedToS3, downloadedFile); + } + + /// + /// Make a request to generate a transcription for the specified 'content'. + /// + /// + /// + /// + /// + private async Task ProcessClipRequestAsync(ClipRequestModel request, ContentModel content) + { + var requestContentId = request.ContentId; + this.Logger.LogInformation("Auto clipper request received. Content ID: {Id}", requestContentId); + + var contentFile = content.FileReferences.FirstOrDefault(); + var relativePath = contentFile?.Path; + var clipSourcePath = !string.IsNullOrWhiteSpace(relativePath) ? Path.Join(this.Options.VolumePath, relativePath.MakeRelativePath()) : string.Empty; + var isSyncedToS3 = contentFile?.IsSyncedToS3 == true; + var downloadedFile = string.Empty; + var generatedClipFiles = new List(); + + if (isSyncedToS3) + { + clipSourcePath = contentFile?.S3Path ?? string.Empty; + if (!string.IsNullOrWhiteSpace(contentFile?.S3Path)) + { + downloadedFile = await DownloadS3File(contentFile.S3Path); + if (!string.IsNullOrWhiteSpace(downloadedFile)) + { + clipSourcePath = downloadedFile; + } + } + } + + // If the file doesn't exist don't continue. + if (!File.Exists(clipSourcePath)) + { + this.Logger.LogError("File does not exist for content. Content ID: {Id}, Path: {Path}", requestContentId, clipSourcePath); + var workOrderFailedException = new FileMissingException(requestContentId, clipSourcePath ?? string.Empty); + await this.SendNoticeEmailAsync($"File missing for Content ID: {requestContentId}", workOrderFailedException); + await UpdateWorkOrderAsync(request, WorkOrderStatus.Failed, workOrderFailedException); + return; + } + + // When a work order has been cancelled don't continue. + var workOrder = await UpdateWorkOrderAsync(request, content.IsApproved ? WorkOrderStatus.Cancelled : WorkOrderStatus.InProgress); + var originalBody = content.Body; + if (workOrder?.Status != WorkOrderStatus.InProgress) + { + if (workOrder?.Status == WorkOrderStatus.Cancelled) + this.Logger.LogWarning("Work order has been cancelled. Content ID: {id}", requestContentId); + else + this.Logger.LogWarning("Request ignored because it does not have a work order. Content ID: {id}", requestContentId); + await CleanUpFilesAsync(generatedClipFiles, isSyncedToS3, downloadedFile); + return; + } + + // Send file to Azure Speech Services for transcription. + // Then send transcription to Azure Open AI to extract separate stories from the transcription. + var stationCode = content.Source?.Code ?? content.Source?.Name ?? "default"; + var stationProfile = _stationConfiguration.GetProfile(stationCode); + var targetSampleRate = stationProfile.Transcription.SampleRate > 0 ? stationProfile.Transcription.SampleRate : stationProfile.SampleRate; + var processingContext = new ClipProcessingContext( + clipSourcePath, + stationProfile, + request, + targetSampleRate); + var pipelineResult = await _processingPipeline.ExecuteAsync(processingContext, _cancelToken?.Token ?? CancellationToken.None); + var segments = pipelineResult.Segments; + var clipDefinitions = pipelineResult.ClipDefinitions?.OrderBy(c => c.Start).ToArray() ?? []; + + // If there were no segments in the transcript don't continue. + if (segments.Count == 0) + { + var exception = new EmptyTranscriptException(requestContentId); + await UpdateWorkOrderAsync(request, WorkOrderStatus.Failed, exception); + await CleanUpFilesAsync(generatedClipFiles, isSyncedToS3, downloadedFile); + return; + } + + // If the work order has been cancelled, don't continue. + workOrder = await this.Api.FindWorkOrderAsync(workOrder.Id); + if (workOrder?.Status == WorkOrderStatus.Cancelled) + { + this.Logger.LogWarning("Work order has been cancelled during processing. Content ID: {id}", requestContentId); + await CleanUpFilesAsync(generatedClipFiles, isSyncedToS3, downloadedFile); + return; + } + + // If content doesn't exist, don't continue. + content = (await this.Api.FindContentByIdAsync(requestContentId))!; + if (content == null) + { + var exception = new ContentNotFoundException(requestContentId); + await UpdateWorkOrderAsync(request, WorkOrderStatus.Failed, exception); + await CleanUpFilesAsync(generatedClipFiles, isSyncedToS3, downloadedFile); + return; + } + + // Generate the full transcript. + var transcriptBody = BuildTranscriptDocument(segments); + if (!string.Equals(originalBody, content.Body, StringComparison.Ordinal)) + this.Logger.LogWarning("Transcript will be overwritten. Content ID: {Id}", requestContentId); + + // If content transcript has been approved do not update the transcript on the original story. + if (!content.IsApproved) + { + content.Body = transcriptBody; + await this.Api.UpdateContentAsync(content); + this.Logger.LogInformation("Primary transcript updated. Content ID: {Id}", requestContentId); + } + + // For each separate story identified, create a new story and send it to the API. + for (var clipIndex = 0; clipIndex < clipDefinitions.Length; clipIndex++) + { + try + { + var definition = clipDefinitions[clipIndex]; + var normalized = NormalizeClipDefinition(definition, segments); + if (normalized == null) + { + this.Logger.LogWarning("Skipped invalid clip definition for content {Id}", requestContentId); + continue; + } + + var clipTranscriptSegments = ExtractTranscriptRange(segments, normalized.Start, normalized.End); + if (clipTranscriptSegments.Length == 0) + { + this.Logger.LogWarning("No transcript rows found for clip {clip}", definition.Title); + continue; + } + + var clipTranscript = BuildTranscriptDocument(clipTranscriptSegments); + if (string.IsNullOrWhiteSpace(clipTranscript)) + { + this.Logger.LogWarning("Empty transcript for clip definition {clip}", definition.Title); + continue; + } + + var clipPath = await CreateClipFileAsync(clipSourcePath, $"autoclip_{requestContentId}", normalized.Start, normalized.End); + generatedClipFiles.Add(clipPath); + + await CreateClipContentAsync(content, normalized, clipTranscript, clipPath, clipIndex); + } + catch (Exception ex) + { + this.Logger.LogError(ex, "Failed to generate clip media for content {Id}", requestContentId); + continue; + } + } + + await UpdateWorkOrderAsync(request, WorkOrderStatus.Completed); + await CleanUpFilesAsync(generatedClipFiles, isSyncedToS3, downloadedFile); + } + + private static ClipDefinition? NormalizeClipDefinition(ClipDefinition definition, IReadOnlyList segments) + { + if (segments.Count == 0) return null; + var maxEnd = segments[^1].End; + var start = definition.Start < TimeSpan.Zero ? TimeSpan.Zero : definition.Start; + var end = definition.End > maxEnd ? maxEnd : definition.End; + if (end <= start) return null; + + var first = segments.FirstOrDefault(s => s.End > start); + var last = segments.LastOrDefault(s => s.Start < end); + if (first == null || last == null) return null; + start = first.Start; + end = last.End; + if (end <= start) return null; + + return definition with { Start = start, End = end }; + } + + private static TimestampedTranscript[] ExtractTranscriptRange(IReadOnlyList segments, TimeSpan start, TimeSpan end) + { + return [.. segments.Where(s => s.End > start && s.Start < end)]; + } + + private static async Task CreateClipFileAsync(string srcFile, string outputPrefix, TimeSpan start, TimeSpan end) + { + var directory = Path.GetDirectoryName(srcFile) ?? Path.GetTempPath(); + var ext = Path.GetExtension(srcFile); + var clipName = $"{outputPrefix}_{Guid.NewGuid():N}{ext}"; + var clipPath = Path.Combine(directory, clipName); + var duration = Math.Max(1, (end - start).TotalSeconds); + var process = new System.Diagnostics.Process(); + process.StartInfo.Verb = "clip"; + if (OperatingSystem.IsWindows()) + { + process.StartInfo.FileName = "cmd"; + process.StartInfo.Arguments = $"/c ffmpeg -y -ss {start.TotalSeconds.ToString("0.###", CultureInfo.InvariantCulture)} -i \"{srcFile}\" -t {duration.ToString("0.###", CultureInfo.InvariantCulture)} -c copy \"{clipPath}\""; + } + else + { + process.StartInfo.FileName = "/bin/sh"; + process.StartInfo.Arguments = $"-c \"ffmpeg -y -ss {start.TotalSeconds.ToString("0.###", CultureInfo.InvariantCulture)} -i '{srcFile}' -t {duration.ToString("0.###", CultureInfo.InvariantCulture)} -c copy '{clipPath}' 2>&1\""; + } + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardOutput = true; + process.StartInfo.CreateNoWindow = true; + process.Start(); + var output = await process.StandardOutput.ReadToEndAsync(); + await process.WaitForExitAsync(); + if (process.ExitCode != 0) + throw new InvalidOperationException($"ffmpeg failed to create clip: {output}"); + return clipPath; + } + + private async Task CreateClipContentAsync(ContentModel parentContent, ClipDefinition definition, string transcriptBody, string clipPath, int clipIndex) + { + var clipContent = BuildClipContentModel(parentContent, definition, transcriptBody, clipIndex); + var created = await this.Api.AddContentAsync(clipContent); + if (created == null) return; + + await using var clipStream = File.OpenRead(clipPath); + created = await this.Api.UploadFileAsync(created.Id, created.Version ?? 0, clipStream, Path.GetFileName(clipPath)); + this.Logger.LogInformation("Clip content created. Parent Content: {ParentId}, Clip Content: {ClipId}", parentContent.Id, created?.Id); + } + + private ContentModel BuildClipContentModel(ContentModel sourceContent, ClipDefinition definition, string transcriptBody, int clipIndex) + { + var clipSummary = string.IsNullOrWhiteSpace(definition.Summary) + ? $"Clip covering {FormatTimestamp(definition.Start)} to {FormatTimestamp(definition.End)}" + : definition.Summary; + + var autoTags = _tags?.Where(t => this.Options.ApplyTags.Contains(t.Code)); + var tags = autoTags != null ? sourceContent.Tags.AppendRange(autoTags.Select(at => new ContentTagModel(at.Id, at.Code, at.Name))) : sourceContent.Tags; + + return new ContentModel + { + ContentType = sourceContent.ContentType, + SourceId = sourceContent.SourceId, + Source = sourceContent.Source, + OtherSource = sourceContent.OtherSource, + SeriesId = sourceContent.SeriesId, + Series = sourceContent.Series, + MediaTypeId = sourceContent.MediaTypeId, + LicenseId = sourceContent.LicenseId, + OwnerId = sourceContent.OwnerId, + ContributorId = sourceContent.ContributorId, + Contributor = sourceContent.Contributor, + Byline = sourceContent.Byline, + Status = ContentStatus.Draft, + Uid = BaseService.GetContentHash(sourceContent.Source?.Code ?? "AutoClipper", $"{sourceContent.Uid}-clip-{clipIndex}", sourceContent.PublishedOn), + Headline = $"{definition.Title}", + Summary = $"[AutoClipper:{definition.Category}]\n{clipSummary}", + Body = transcriptBody, + SourceUrl = sourceContent.SourceUrl, + PublishedOn = sourceContent.PublishedOn, + PostedOn = DateTime.UtcNow, + Tags = tags, + Topics = sourceContent.Topics, + Actions = sourceContent.Actions, + Labels = sourceContent.Labels, + IsHidden = false + }; + } + + private static void CleanupLocalFiles(IEnumerable files) + { + CleanupS3Files(files.Where(f => !string.IsNullOrWhiteSpace(f)).ToArray()); + } + + private static void CleanupTemporaryFiles(bool isSyncedToS3, params string[] files) + { + if (!isSyncedToS3) return; + CleanupS3Files(files); + } + + /// + /// Format the transcript to include newlines. + /// + /// + /// + private static string BuildTranscriptDocument(IReadOnlyList segments) + { + if (segments == null || segments.Count == 0) return string.Empty; + + var sb = new StringBuilder(); + var index = 1; + foreach (var segment in segments) + { + if (string.IsNullOrWhiteSpace(segment.Text)) continue; + // sb.AppendLine(index.ToString(CultureInfo.InvariantCulture)); + // sb.AppendLine($"{FormatTimestamp(segment.Start)} --> {FormatTimestamp(segment.End)}"); + sb.AppendLine(segment.Text.Trim()); + sb.AppendLine(); + index++; + } + + return sb.ToString().Trim(); + } + + private static string FormatTimestamp(TimeSpan value) + { + var hours = (int)Math.Floor(value.TotalHours); + return string.Format(CultureInfo.InvariantCulture, "{0:00}:{1:00}:{2:00}.{3:000}", hours, value.Minutes, value.Seconds, value.Milliseconds); + } + + /// + /// Update the work order (if it exists) with the specified 'status'. + /// + /// + /// + /// Whether a work order exists or is not required. + private async Task UpdateWorkOrderAsync(ClipRequestModel request, WorkOrderStatus status, Exception? reason = null) + { + if (request.WorkOrderId > 0) + { + var workOrder = await this.Api.FindWorkOrderAsync(request.WorkOrderId); + if (workOrder != null && !_ignoreWorkOrders.Contains(workOrder.Status)) + { + workOrder.Status = status; + workOrder = await this.Api.UpdateWorkOrderAsync(workOrder); + + if (status == WorkOrderStatus.Failed && reason != null) + { + await this.SendErrorEmailAsync($"Work order failed for Content ID: {request.ContentId}", reason); + this.Logger.LogError(reason, "Work order failed for Content ID: {ContentId}", request.ContentId); + } + } + return workOrder; + } + return null; + } + #endregion +} + diff --git a/services/net/auto-clipper/AutoClipperService.cs b/services/net/auto-clipper/AutoClipperService.cs new file mode 100644 index 0000000000..843a408539 --- /dev/null +++ b/services/net/auto-clipper/AutoClipperService.cs @@ -0,0 +1,68 @@ +using Microsoft.Extensions.DependencyInjection; +using TNO.Core.Storage; +using TNO.Core.Storage.Configuration; +using TNO.Kafka; +using TNO.Kafka.Models; +using TNO.Services.AutoClipper.Audio; +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.Config; +using TNO.Services.AutoClipper.LLM; +using TNO.Services.AutoClipper.Pipeline; +using TNO.Services.Runners; + + +namespace TNO.Services.AutoClipper; + +/// +/// AutoClipperService abstract class, provides a console application that runs service, and an api. +/// The AutoClipperService is a Kafka consumer which pulls content. +/// +public class AutoClipperService : KafkaConsumerService +{ + #region Variables + #endregion + + #region Properties + #endregion + + #region Constructors + /// + /// Creates a new instance of a AutoClipperService object, initializes with arguments. + /// + /// + public AutoClipperService(string[] args) : base(args) + { + } + #endregion + + #region Methods + /// + /// Configure dependency injection. + /// + /// + /// + protected override IServiceCollection ConfigureServices(IServiceCollection services) + { + base.ConfigureServices(services); + services + .Configure(this.Configuration.GetSection("Service")) + .AddTransient, KafkaListener>() + .AddSingleton() + .Configure(this.Configuration.GetSection("S3")) + .AddSingleton() + .AddSingleton() + .AddSingleton() + .AddSingleton(); + + services.AddSingleton(); + services.AddHttpClient(); + + // TODO: Figure out how to validate without resulting in aggregating the config values. + // services.AddOptions() + // .Bind(this.Configuration.GetSection("Service")) + // .ValidateDataAnnotations(); + + return services; + } + #endregion +} diff --git a/services/net/auto-clipper/Azure/AzureSpeechTranscriptionService.cs b/services/net/auto-clipper/Azure/AzureSpeechTranscriptionService.cs new file mode 100644 index 0000000000..9e618adb29 --- /dev/null +++ b/services/net/auto-clipper/Azure/AzureSpeechTranscriptionService.cs @@ -0,0 +1,408 @@ +using System.Globalization; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Xml; +using Azure.Storage.Blobs; +using Azure.Storage.Sas; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TNO.Services.AutoClipper.Config; + +namespace TNO.Services.AutoClipper.Azure; + +/// +/// AzureSpeechTranscriptionService class, provides a way to send a file to Azure Speech service and get a transcription. +/// +public class AzureSpeechTranscriptionService : IAzureSpeechTranscriptionService +{ + private const string DefaultApiVersion = "v3.2"; + private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) + { + PropertyNameCaseInsensitive = true + }; + + private readonly AutoClipperOptions _options; + private readonly ILogger _logger; + private readonly HttpClient _httpClient; + private readonly BlobContainerClient? _containerClient; + + public AzureSpeechTranscriptionService(HttpClient httpClient, IOptions options, ILogger logger) + { + _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient)); + _options = options.Value; + _logger = logger; + + if (!string.IsNullOrWhiteSpace(_options.AzureSpeechStorageConnectionString) && !string.IsNullOrWhiteSpace(_options.AzureSpeechStorageContainer)) + { + _containerClient = new BlobContainerClient(_options.AzureSpeechStorageConnectionString, _options.AzureSpeechStorageContainer); + Console.WriteLine($"Storage: {_containerClient?.AccountName}/{_containerClient?.Name}"); + } + } + + public async Task> TranscribeAsync(string filePath, SpeechTranscriptionRequest request, CancellationToken cancellationToken) + { + if (request == null) throw new ArgumentNullException(nameof(request)); + if (string.IsNullOrWhiteSpace(request.Language)) throw new ArgumentException("Speech recognition language must be provided.", nameof(request)); + if (string.IsNullOrWhiteSpace(filePath) || !File.Exists(filePath)) throw new FileNotFoundException("Audio file does not exist", filePath); + if (string.IsNullOrWhiteSpace(_options.AzureSpeechKey)) throw new InvalidOperationException("Azure Speech key is required for batch transcription."); + if (_containerClient == null) throw new InvalidOperationException("Azure Speech storage connection information must be configured for batch transcription."); + + await _containerClient.CreateIfNotExistsAsync(cancellationToken: cancellationToken).ConfigureAwait(false); + + var blobClient = await UploadAudioAsync(filePath, cancellationToken).ConfigureAwait(false); + try + { + var audioUrl = GenerateReadSasUri(blobClient); + var transcriptionUri = await CreateTranscriptionAsync(audioUrl, Path.GetFileName(filePath), request, cancellationToken).ConfigureAwait(false); + var finalState = await WaitForCompletionAsync(transcriptionUri, cancellationToken).ConfigureAwait(false); + var transcriptUrl = await ResolveTranscriptFileAsync(finalState, cancellationToken).ConfigureAwait(false); + var transcriptJson = await DownloadTranscriptAsync(transcriptUrl, cancellationToken).ConfigureAwait(false); + var segments = ParseTranscript(transcriptJson); + + if (segments.Length == 0) + _logger.LogWarning("Azure batch transcription produced no transcript entries for {File}", filePath); + + return segments; + } + finally + { + try + { + await blobClient.DeleteIfExistsAsync(cancellationToken: CancellationToken.None).ConfigureAwait(false); + } + catch (Exception ex) + { + _logger.LogDebug(ex, "Failed to clean up temporary blob {Blob}", blobClient.Name); + } + } + } + + private async Task UploadAudioAsync(string filePath, CancellationToken cancellationToken) + { + var blobName = $"{Guid.NewGuid():N}-{Path.GetFileName(filePath)}"; + var blobClient = _containerClient!.GetBlobClient(blobName); + await using var stream = File.OpenRead(filePath); + await blobClient.UploadAsync(stream, overwrite: true, cancellationToken).ConfigureAwait(false); + _logger.LogDebug("Uploaded {File} to Azure blob {Blob}", filePath, blobName); + return blobClient; + } + + private Uri GenerateReadSasUri(BlobClient blobClient) + { + if (!blobClient.CanGenerateSasUri) + throw new InvalidOperationException("Unable to generate SAS URI for Azure Speech batch transcription. Ensure the connection string includes an account key."); + + var minutes = _options.AzureSpeechStorageSasExpiryMinutes <= 0 ? 120 : _options.AzureSpeechStorageSasExpiryMinutes; + var builder = new BlobSasBuilder + { + BlobContainerName = blobClient.BlobContainerName, + BlobName = blobClient.Name, + Resource = "b", + ExpiresOn = DateTimeOffset.UtcNow.AddMinutes(Math.Max(5, minutes)) + }; + builder.SetPermissions(BlobSasPermissions.Read); + return blobClient.GenerateSasUri(builder); + } + + private async Task CreateTranscriptionAsync(Uri contentUri, string fileName, SpeechTranscriptionRequest request, CancellationToken cancellationToken) + { + var payload = BuildTranscriptionPayload(contentUri, fileName, request); + var version = GetApiVersion(); + var uri = version.StartsWith("v", StringComparison.OrdinalIgnoreCase) + ? $"{GetSpeechEndpoint()}/speechtotext/{version}/transcriptions" + : $"{GetSpeechEndpoint()}/speechtotext/transcriptions?api-version={version}"; + Console.WriteLine($"POST {uri}"); + Console.WriteLine(JsonSerializer.Serialize(payload, new JsonSerializerOptions { WriteIndented = true })); + using var httpRequest = new HttpRequestMessage(HttpMethod.Post, uri); + ApplyHeaders(httpRequest); + httpRequest.Content = new StringContent(JsonSerializer.Serialize(payload, SerializerOptions), Encoding.UTF8, "application/json"); + + using var response = await _httpClient.SendAsync(httpRequest, cancellationToken).ConfigureAwait(false); + var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + if (!response.IsSuccessStatusCode) + throw new InvalidOperationException($"Azure Speech batch transcription request failed: {(int)response.StatusCode} {response.ReasonPhrase}. {body}"); + + Uri? location = response.Headers.Location; + if (location == null) + { + using var doc = JsonDocument.Parse(body); + if (doc.RootElement.TryGetProperty("self", out var node)) + Uri.TryCreate(node.GetString(), UriKind.Absolute, out location); + } + + if (location == null) + throw new InvalidOperationException("Azure Speech batch transcription response did not include an operation location."); + + _logger.LogInformation("Azure batch transcription created for {File}. Operation: {Operation}", fileName, location); + return EnsureApiVersion(location); + } + + private static object BuildTranscriptionPayload(Uri contentUri, string fileName, SpeechTranscriptionRequest request) + { + var properties = new Dictionary + { + ["wordLevelTimestampsEnabled"] = true, + ["punctuationMode"] = "DictatedAndAutomatic", + ["profanityFilterMode"] = "Masked" + }; + + if (request.EnableSpeakerDiarization) + { + properties["diarizationEnabled"] = true; + if (request.SpeakerCount.HasValue && request.SpeakerCount > 0) + { + properties["diarization"] = new + { + maxSpeakers = request.SpeakerCount.Value + }; + } + } + + return new + { + displayName = $"AutoClipper-{fileName}", + description = "AutoClipper batch transcription", + locale = request.Language, + contentUrls = new[] { contentUri.ToString() }, + properties + }; + } + + private async Task WaitForCompletionAsync(Uri transcriptionUri, CancellationToken cancellationToken) + { + var pollInterval = TimeSpan.FromSeconds(Math.Max(5, _options.AzureSpeechBatchPollingIntervalSeconds <= 0 ? 10 : _options.AzureSpeechBatchPollingIntervalSeconds)); + var timeout = TimeSpan.FromMinutes(Math.Max(5, _options.AzureSpeechBatchTimeoutMinutes <= 0 ? 45 : _options.AzureSpeechBatchTimeoutMinutes)); + var expiry = DateTime.UtcNow + timeout; + + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + var transcription = await GetTranscriptionAsync(transcriptionUri, cancellationToken).ConfigureAwait(false); + if (transcription == null) + throw new InvalidOperationException("Azure Speech returned an empty transcription status response."); + + var status = transcription.Status ?? string.Empty; + if (status.Equals("succeeded", StringComparison.OrdinalIgnoreCase)) + return transcription; + + if (status.Equals("failed", StringComparison.OrdinalIgnoreCase)) + throw new InvalidOperationException($"Azure batch transcription failed: {BuildErrorMessage(transcription.Error)}"); + + if (DateTime.UtcNow >= expiry) + throw new TimeoutException("Azure batch transcription did not complete before the configured timeout."); + + _logger.LogDebug("Azure batch transcription status for {Id}: {Status}", transcription.Uri ?? transcriptionUri, status); + await Task.Delay(pollInterval, cancellationToken).ConfigureAwait(false); + } + } + + private async Task GetTranscriptionAsync(Uri uri, CancellationToken cancellationToken) + { + using var request = new HttpRequestMessage(HttpMethod.Get, EnsureApiVersion(uri)); + ApplyHeaders(request); + using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); + var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + if (!response.IsSuccessStatusCode) + throw new InvalidOperationException($"Azure Speech status request failed: {(int)response.StatusCode} {response.ReasonPhrase}. {body}"); + + var transcription = JsonSerializer.Deserialize(body, SerializerOptions); + return transcription; + } + + private async Task ResolveTranscriptFileAsync(BatchTranscription transcription, CancellationToken cancellationToken) + { + var filesLink = transcription.Links?.Files; + if (filesLink == null) + throw new InvalidOperationException("Azure Speech response did not include a files link."); + + using var request = new HttpRequestMessage(HttpMethod.Get, EnsureApiVersion(filesLink)); + ApplyHeaders(request); + using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); + var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + if (!response.IsSuccessStatusCode) + throw new InvalidOperationException($"Azure Speech file listing failed: {(int)response.StatusCode} {response.ReasonPhrase}. {body}"); + + var files = JsonSerializer.Deserialize(body, SerializerOptions); + var transcriptFile = files?.Values?.FirstOrDefault(f => string.Equals(f.Kind, "Transcription", StringComparison.OrdinalIgnoreCase) && f.Links?.ContentUrl != null); + if (transcriptFile?.Links?.ContentUrl == null) + throw new InvalidOperationException("Azure Speech file listing did not contain a transcription content URL."); + + return transcriptFile.Links.ContentUrl; + } + + private async Task DownloadTranscriptAsync(Uri transcriptUri, CancellationToken cancellationToken) + { + using var request = new HttpRequestMessage(HttpMethod.Get, transcriptUri); + using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); + response.EnsureSuccessStatusCode(); + return await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + } + + private static TimestampedTranscript[] ParseTranscript(string? json) + { + if (string.IsNullOrWhiteSpace(json)) return Array.Empty(); + + using var document = JsonDocument.Parse(json); + if (!document.RootElement.TryGetProperty("recognizedPhrases", out var phrases) || phrases.ValueKind != JsonValueKind.Array) + return Array.Empty(); + + var results = new List(); + foreach (var phrase in phrases.EnumerateArray()) + { + var start = ReadTimeSpan(phrase, "offset"); + var duration = ReadTimeSpan(phrase, "duration"); + var end = duration > TimeSpan.Zero ? start + duration : start; + var text = ReadPhraseText(phrase); + if (string.IsNullOrWhiteSpace(text)) continue; + if (end <= start) end = start + TimeSpan.FromMilliseconds(100); + results.Add(new TimestampedTranscript(start, end, text.Trim())); + } + + return results + .OrderBy(r => r.Start) + .ToArray(); + } + + private static string? ReadPhraseText(JsonElement phrase) + { + if (phrase.TryGetProperty("nBest", out var nBest) && nBest.ValueKind == JsonValueKind.Array) + { + foreach (var alt in nBest.EnumerateArray()) + { + if (alt.ValueKind != JsonValueKind.Object) continue; + if (alt.TryGetProperty("display", out var display) && display.ValueKind == JsonValueKind.String) + return display.GetString(); + if (alt.TryGetProperty("lexical", out var lexical) && lexical.ValueKind == JsonValueKind.String) + return lexical.GetString(); + } + } + + if (phrase.TryGetProperty("displayText", out var displayText) && displayText.ValueKind == JsonValueKind.String) + return displayText.GetString(); + if (phrase.TryGetProperty("text", out var text) && text.ValueKind == JsonValueKind.String) + return text.GetString(); + + return null; + } + + private static TimeSpan ReadTimeSpan(JsonElement element, string property) + { + if (!element.TryGetProperty(property, out var node)) return TimeSpan.Zero; + if (node.ValueKind == JsonValueKind.Number) + { + if (node.TryGetInt64(out var ticks)) return TimeSpan.FromTicks(ticks); + if (node.TryGetDouble(out var seconds)) return TimeSpan.FromSeconds(seconds); + } + else if (node.ValueKind == JsonValueKind.String) + { + var value = node.GetString(); + if (string.IsNullOrWhiteSpace(value)) return TimeSpan.Zero; + if (long.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var ticks)) return TimeSpan.FromTicks(ticks); + if (double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var seconds)) return TimeSpan.FromSeconds(seconds); + if (TimeSpan.TryParse(value, CultureInfo.InvariantCulture, out var ts)) return ts; + if (TryParseIsoDuration(value, out var isoTs)) return isoTs; + } + + return TimeSpan.Zero; + } + + private static bool TryParseIsoDuration(string value, out TimeSpan result) + { + try + { + result = XmlConvert.ToTimeSpan(value); + return true; + } + catch (Exception) + { + result = TimeSpan.Zero; + return false; + } + } + + private string BuildErrorMessage(BatchTranscriptionError? error) + { + if (error == null) return "Unknown error"; + var builder = new StringBuilder(); + AppendError(builder, error); + return builder.ToString(); + } + + private void AppendError(StringBuilder builder, BatchTranscriptionError? error) + { + if (error == null) return; + if (builder.Length > 0) builder.Append(" -> "); + builder.Append(error.Code); + if (!string.IsNullOrWhiteSpace(error.Message)) + { + builder.Append(':').Append(' ').Append(error.Message); + } + if (error.InnerError != null) AppendError(builder, error.InnerError); + } + + private void ApplyHeaders(HttpRequestMessage request) + { + request.Headers.Remove("Ocp-Apim-Subscription-Key"); + request.Headers.Add("Ocp-Apim-Subscription-Key", _options.AzureSpeechKey); + } + + private Uri EnsureApiVersion(Uri uri) + { + if (uri == null) throw new ArgumentNullException(nameof(uri)); + if (!string.IsNullOrWhiteSpace(uri.Query) && uri.Query.Contains("api-version", StringComparison.OrdinalIgnoreCase)) + return uri; + + var builder = new UriBuilder(uri); + var query = builder.Query.TrimStart('?'); + var version = $"api-version={GetApiVersion()}"; + builder.Query = string.IsNullOrWhiteSpace(query) ? version : $"{query}&{version}"; + return builder.Uri; + } + + private string GetApiVersion() => string.IsNullOrWhiteSpace(_options.AzureSpeechBatchApiVersion) ? DefaultApiVersion : _options.AzureSpeechBatchApiVersion; + + private string GetSpeechEndpoint() + { + if (!string.IsNullOrWhiteSpace(_options.AzureSpeechBatchEndpoint)) + return _options.AzureSpeechBatchEndpoint.TrimEnd('/'); + if (!string.IsNullOrWhiteSpace(_options.AzureSpeechRegion)) + return $"https://{_options.AzureSpeechRegion}.api.cognitive.microsoft.com"; + throw new InvalidOperationException("Azure Speech region or endpoint must be configured."); + } + + private sealed record BatchTranscription( + [property: JsonPropertyName("self")] Uri? Uri, + [property: JsonPropertyName("status")] string? Status, + [property: JsonPropertyName("links")] BatchTranscriptionLinks? Links, + [property: JsonPropertyName("error")] BatchTranscriptionError? Error); + + private sealed record BatchTranscriptionLinks( + [property: JsonPropertyName("files")] Uri? Files, + [property: JsonPropertyName("content")] Uri? Content); + + private sealed record BatchTranscriptionError( + [property: JsonPropertyName("code")] string? Code, + [property: JsonPropertyName("message")] string? Message, + [property: JsonPropertyName("innerError")] BatchTranscriptionError? InnerError); + + private sealed record BatchTranscriptionFilesResponse( + [property: JsonPropertyName("values")] IReadOnlyList? Values); + + private sealed record BatchTranscriptionFile( + [property: JsonPropertyName("name")] string? Name, + [property: JsonPropertyName("kind")] string? Kind, + [property: JsonPropertyName("links")] BatchTranscriptionFileLinks? Links); + + private sealed record BatchTranscriptionFileLinks( + [property: JsonPropertyName("contentUrl")] Uri? ContentUrl); +} + + + + + + + + + diff --git a/services/net/auto-clipper/Azure/IAzureSpeechTranscriptionService.cs b/services/net/auto-clipper/Azure/IAzureSpeechTranscriptionService.cs new file mode 100644 index 0000000000..7894bf240f --- /dev/null +++ b/services/net/auto-clipper/Azure/IAzureSpeechTranscriptionService.cs @@ -0,0 +1,6 @@ +namespace TNO.Services.AutoClipper.Azure; + +public interface IAzureSpeechTranscriptionService +{ + Task> TranscribeAsync(string filePath, SpeechTranscriptionRequest request, CancellationToken cancellationToken); +} diff --git a/services/net/auto-clipper/Azure/IAzureVideoIndexerClient.cs b/services/net/auto-clipper/Azure/IAzureVideoIndexerClient.cs new file mode 100644 index 0000000000..b8742b3d2f --- /dev/null +++ b/services/net/auto-clipper/Azure/IAzureVideoIndexerClient.cs @@ -0,0 +1,11 @@ +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace TNO.Services.AutoClipper.Azure; + +public interface IAzureVideoIndexerClient +{ + Task> GenerateTranscriptAsync(Stream stream, string fileName, string language, CancellationToken cancellationToken = default); +} diff --git a/services/net/auto-clipper/Azure/SpeechTranscriptionRequest.cs b/services/net/auto-clipper/Azure/SpeechTranscriptionRequest.cs new file mode 100644 index 0000000000..f0e7b7f556 --- /dev/null +++ b/services/net/auto-clipper/Azure/SpeechTranscriptionRequest.cs @@ -0,0 +1,9 @@ +namespace TNO.Services.AutoClipper.Azure; + +public class SpeechTranscriptionRequest +{ + public string Language { get; init; } = "en-US"; + public bool EnableSpeakerDiarization { get; init; } + public int? SpeakerCount { get; init; } + public string? DiarizationMode { get; init; } +} diff --git a/services/net/auto-clipper/Azure/TimestampedTranscript.cs b/services/net/auto-clipper/Azure/TimestampedTranscript.cs new file mode 100644 index 0000000000..02795aef9e --- /dev/null +++ b/services/net/auto-clipper/Azure/TimestampedTranscript.cs @@ -0,0 +1,5 @@ +using System; + +namespace TNO.Services.AutoClipper.Azure; + +public record TimestampedTranscript(TimeSpan Start, TimeSpan End, string Text); diff --git a/services/net/auto-clipper/Config/AutoClipperOptions.cs b/services/net/auto-clipper/Config/AutoClipperOptions.cs new file mode 100644 index 0000000000..cee1d3a4e2 --- /dev/null +++ b/services/net/auto-clipper/Config/AutoClipperOptions.cs @@ -0,0 +1,131 @@ +using System.ComponentModel.DataAnnotations; +using TNO.Services.Config; + +namespace TNO.Services.AutoClipper.Config; + +public class AutoClipperOptions : ServiceOptions +{ + /// + /// get/set - The topic name in Kafka to subscriber for work orders. + /// + public string Topics { get; set; } = "request-clips"; + + /// + /// get/set - The path to the mapped volume containing the files. + /// + public string VolumePath { get; set; } = ""; + + /// + /// get/set - An array of tag codes to add to content when it is created. + /// + public string[] ApplyTags { get; set; } = []; + + /// + /// get/set - Path to the location station configuration files are stored. + /// + public string StationConfigPath { get; set; } = Path.Combine("Config", "Stations"); + + /// + /// get/set - The maximum number of stories to generate from a clip. + /// + public int MaxStoriesFromClip { get; set; } = 5; + + #region Azure Speech Service configuration + /// + /// get/set - The API key to use Azure Speech services. + /// + public string AzureSpeechKey { get; set; } = ""; + + /// + /// get/set - The region the Azure speech services is deployed. + /// + public string AzureSpeechRegion { get; set; } = ""; + + /// + /// get/set - The URL endpoint to the batch service. + /// + public string AzureSpeechBatchEndpoint { get; set; } = string.Empty; + + /// + /// get/set - The API version. + /// + public string AzureSpeechBatchApiVersion { get; set; } = "v3.2"; + + /// + /// get/set - Configure Azure speech services. + /// + public int AzureSpeechBatchPollingIntervalSeconds { get; set; } = 10; + + /// + /// get/set - Configure Azure speech services. + /// + public int AzureSpeechBatchTimeoutMinutes { get; set; } = 45; + + /// + /// get/set - Configure Azure speech services. + /// + public string DefaultTranscriptLanguage { get; set; } = "en-US"; + + /// + /// get/set - Configure Azure speech services. + /// + public int AzureSpeechMaxRetries { get; set; } = 3; + + /// + /// get/set - Configure Azure speech services. + /// + public int AzureSpeechRetryDelaySeconds { get; set; } = 5; + #endregion + + #region Azure Storage configuration + /// + /// get/set - Configure Azure storage connection string. + /// + public string AzureSpeechStorageConnectionString { get; set; } = string.Empty; + + /// + /// get/set - Configure Azure speech services. + /// + public string AzureSpeechStorageContainer { get; set; } = string.Empty; + + /// + /// get/set - Configure Azure storage services. + /// + public int AzureSpeechStorageSasExpiryMinutes { get; set; } = 180; + #endregion + + #region Azure AI configuration + /// + /// get/set - The URL to the LLM + /// + [Required] + public Uri LlmApiUrl { get; set; } = default!; + + /// + /// get/set - The API key + /// + [Required] + public string LlmApiKey { get; set; } = ""; + + /// + /// get/set - The LLM model to use. + /// + public string LlmDefaultModel { get; set; } = ""; + + /// + /// get/set - The Default LLM prompt. + /// + public string LlmPrompt { get; set; } = string.Empty; + + /// + /// get/set - The maximum prompt character limit. + /// + public int LlmPromptCharacterLimit { get; set; } = 0; + + /// + /// get/set - The LLM boundary score threshold. + /// + public double LlmBoundaryScoreThreshold { get; set; } = 0.55; + #endregion + +} diff --git a/services/net/auto-clipper/Config/StationConfigurationService.cs b/services/net/auto-clipper/Config/StationConfigurationService.cs new file mode 100644 index 0000000000..bc8725b006 --- /dev/null +++ b/services/net/auto-clipper/Config/StationConfigurationService.cs @@ -0,0 +1,76 @@ +using System.Collections.Concurrent; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using YamlDotNet.Serialization; +using YamlDotNet.Serialization.NamingConventions; + +namespace TNO.Services.AutoClipper.Config; + +public interface IStationConfigurationService +{ + StationProfile GetProfile(string? stationCode); +} + +public class StationConfigurationService : IStationConfigurationService +{ + private readonly ILogger _logger; + private readonly ConcurrentDictionary _profiles = new(StringComparer.OrdinalIgnoreCase); + private readonly StationProfile _defaultProfile = new(); + + public StationConfigurationService(IOptions options, ILogger logger) + { + _logger = logger; + LoadProfiles(options.Value.StationConfigPath); + } + + public StationProfile GetProfile(string? stationCode) + { + if (!string.IsNullOrWhiteSpace(stationCode) && _profiles.TryGetValue(stationCode, out var profile)) + return profile; + if (_profiles.TryGetValue("default", out var defaultProfile)) + return defaultProfile; + return _defaultProfile; + } + + private void LoadProfiles(string? path) + { + if (string.IsNullOrWhiteSpace(path)) { _logger.LogWarning("StationConfigPath not set; using defaults"); return; } + var basePath = Path.IsPathRooted(path) ? path : Path.Combine(AppContext.BaseDirectory, path); + if (Directory.Exists(basePath)) + { + foreach (var file in Directory.EnumerateFiles(basePath, "*.yml", SearchOption.TopDirectoryOnly) + .Concat(Directory.EnumerateFiles(basePath, "*.yaml", SearchOption.TopDirectoryOnly))) + { + TryLoadProfile(file); + } + } + else if (File.Exists(basePath)) + { + TryLoadProfile(basePath); + } + else + { + _logger.LogWarning("Station configuration path '{path}' not found.", basePath); + } + } + + private void TryLoadProfile(string file) + { + try + { + var deserializer = new DeserializerBuilder() + .WithNamingConvention(UnderscoredNamingConvention.Instance) + .IgnoreUnmatchedProperties() + .Build(); + using var reader = File.OpenText(file); + var profile = deserializer.Deserialize(reader); + if (string.IsNullOrWhiteSpace(profile.Name)) profile.Name = Path.GetFileNameWithoutExtension(file); + _profiles[profile.Name] = profile; + _logger.LogInformation("Loaded station profile '{Name}' from {File}", profile.Name, file); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to load station profile from {File}", file); + } + } +} diff --git a/services/net/auto-clipper/Config/StationProfile.cs b/services/net/auto-clipper/Config/StationProfile.cs new file mode 100644 index 0000000000..408475cba5 --- /dev/null +++ b/services/net/auto-clipper/Config/StationProfile.cs @@ -0,0 +1,53 @@ +namespace TNO.Services.AutoClipper.Config; + +public class StationProfile +{ + public string Name { get; set; } = "default"; + public int SampleRate { get; set; } = 16000; + public StationTranscriptionProfile Transcription { get; set; } = new(); + public StationTextProfile Text { get; set; } = new(); + public StationHeuristicProfile Heuristics { get; set; } = new StationHeuristicProfile(); +} + +public class StationTranscriptionProfile +{ + public string Provider { get; set; } = "azure_speech"; + public bool Diarization { get; set; } + public int? MaxSpeakers { get; set; } + public string? DiarizationMode { get; set; } = "online"; + public string Language { get; set; } = "en-US"; + public int SampleRate { get; set; } = 16000; +} + +public class StationTextProfile +{ + public double ChunkSizeSeconds { get; set; } = 3.0; + public double ChunkOverlapRatio { get; set; } = 0.5; + public double HeuristicBoundaryWeight { get; set; } = 0.15; + public Dictionary KeywordCategories { get; set; } = new(); + public bool LlmSegmentation { get; set; } = true; + public string LlmModel { get; set; } = string.Empty; + public float? LlmTemperature { get; set; } + public string LlmPrompt { get; set; } = string.Empty; + public string? SystemPrompt { get; set; } + public bool? LlmDiarization { get; set; } = null; + public int? PromptCharacterLimit { get; set; } + public int? MaxStories { get; set; } +} + +public class StationHeuristicProfile +{ + public Dictionary BoundaryWeights { get; set; } = new(); + public List KeywordPatterns { get; set; } = new(); + public List PatternEntries { get; set; } = new(); +} + +public class StationHeuristicPattern +{ + public string Pattern { get; set; } = string.Empty; + public double? Weight { get; set; } + public string? Category { get; set; } + public string? Note { get; set; } +} + + diff --git a/services/net/auto-clipper/Config/Stations/CKNW.yml b/services/net/auto-clipper/Config/Stations/CKNW.yml new file mode 100644 index 0000000000..105b3bef7e --- /dev/null +++ b/services/net/auto-clipper/Config/Stations/CKNW.yml @@ -0,0 +1,87 @@ +# Station specific configuration for CKNW. Adjust as needed. +name: CKNW +sample_rate: 16000 +transcription: + provider: azure_speech + diarization: false + max_speakers: 10 + language: en-CA +text: + chunk_size_s: 3.0 + chunk_overlap_ratio: 0.5 + heuristic_boundary_weight: 0.35 + keyword_categories: + "(?i)traffic": Traffic + "(?i)weather": Weather + "(?i)sponsor": Ad + "(?i)commercial": Ad + "(?i)up next": Promo + "(?i)coming up": Promo + llm_segmentation: true + llm_model: gpt-5.1-chat + llm_temperature: 1 + system_prompt: | + You are a Broadcast Structure Parser. Your ONLY job is to detect segment transitions. + Output MUST be a single, raw JSON object. + CRITICAL: Start your response with '{' and end with '}'. + DO NOT use markdown, backticks, or "```json" blocks. No introductory or closing text. + max_stories: 15 + llm_prompt: | + Identify every point in the transcript where the topic or segment type changes. + + # STRUCTURAL RULES (To Prevent Bundling) + 1. **The Sign-off Rule**: Phrases like "Global News," "CBC News," or "Reporting live" followed by a name mark the END of a segment. The very next sentence MUST be a new boundary. + 2. **The Handoff Rule**: When an anchor introduces a reporter (e.g., "As Joshua reports..."), the boundary starts at the ANCHOR'S introduction line. + 3. **Mandatory Category Split**: News, Traffic, Weather, and Ads MUST be isolated. Never bundle a Traffic report with a News story. + 4. **Zero Bloating**: Treat every unique headline as a separate clip. If the topic shifts from a shooting to a stabbing, create two distinct boundaries. + + # OUTPUT FORMAT (Raw JSON ONLY) + { + "boundaries": [ + { + "index": [Sentence Number], + "category": "News | Traffic | Weather | Ad | Promo", + "title": "[Short Slug]", + "summary": "[One Sentence Recap]", + "score": 0.95 + } + ] + } + + Transcript: + {{transcript}} +heuristics: + pattern_entries: + # --- Existing Patterns --- + - pattern: "(?i)coming up" + weight: 0.65 + category: Promo + note: Host tease for the next story + - pattern: "(?i)after the break" + weight: 0.65 + category: Promo + note: Signals a hard break/transition + + # --- New Anchor-Specific Patterns --- + - pattern: "(?i)Ian Wilson" + weight: 0.85 + category: Traffic + note: Mandatory Traffic anchor introduction + - pattern: "(?i)Mark Madryga" + weight: 0.85 + category: Weather + note: Mandatory Weather anchor introduction + - pattern: "(?i)John Straight" + weight: 0.60 + category: News + note: Primary News anchor reset + + # --- Service Cues --- + - pattern: "(?i)traffic update" + weight: 0.6 + category: Traffic + note: Recurring traffic block + - pattern: "(?i)weather update" + weight: 0.55 + category: Weather + note: Weather hits are their own segments diff --git a/services/net/auto-clipper/Dockerfile b/services/net/auto-clipper/Dockerfile new file mode 100644 index 0000000000..801705c462 --- /dev/null +++ b/services/net/auto-clipper/Dockerfile @@ -0,0 +1,43 @@ + +FROM mcr.microsoft.com/dotnet/sdk:9.0 AS build + +ENV DOTNET_CLI_HOME=/tmp +ENV PATH="$PATH:/tmp/.dotnet/tools" +ENV ASPNETCORE_ENVIRONMENT=Production + +# Switch to root for package installs +USER 0 + +WORKDIR /src +COPY services/net/auto-clipper services/net/auto-clipper +COPY libs/net libs/net + +RUN fix_permissions() { while [ $# -gt 0 ] ; do chgrp -R 0 "$1" && chmod -R g=u "$1"; shift; done } && \ + fix_permissions "/tmp" + +WORKDIR /src/services/net/auto-clipper +RUN dotnet build -c $ASPNETCORE_ENVIRONMENT -o /build + +FROM mcr.microsoft.com/dotnet/aspnet:9.0 AS deploy + +WORKDIR /app +COPY --from=build /build . + +# This volume is the local storage for uploaded files. +RUN mkdir /data +VOLUME /data + +# [Optional] Uncomment this section to install additional OS packages. +RUN apt-get update --fix-missing && apt-get -y upgrade \ + && export DEBIAN_FRONTEND=noninteractive \ + && apt-get -y install --no-install-recommends procps curl ffmpeg +RUN apt-get -y install \ + libc6-dev libgdiplus libgdiplus libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libgstreamer-plugins-bad1.0-dev \ + gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly \ + gstreamer1.0-libav gstreamer1.0-tools gstreamer1.0-alsa gstreamer1.0-gl gstreamer1.0-gtk3 gstreamer1.0-qt5 gstreamer1.0-pulseaudio \ + gstreamer1.0-x + +# Run container by default as user with id 1001 (default) +USER 1001 + +ENTRYPOINT ["dotnet", "TNO.Services.AutoClipper.dll"] diff --git a/services/net/auto-clipper/Exceptions.cs b/services/net/auto-clipper/Exceptions.cs new file mode 100644 index 0000000000..7dc30e6076 --- /dev/null +++ b/services/net/auto-clipper/Exceptions.cs @@ -0,0 +1,42 @@ +using System; + +namespace TNO.Services.AutoClipper.Exceptions +{ + public class FileMissingException : Exception + { + public long ContentId { get; } + public string Path { get; } + + public FileMissingException(long contentId, string path) + : base($"File missing for Content ID: {contentId}. Path: {path}") + { + ContentId = contentId; + Path = path; + } + } + + public class EmptyTranscriptException : Exception + { + public long ContentId { get; } + + public EmptyTranscriptException(long contentId) + : base($"Content did not generate a transcript. Content ID: {contentId}") + { + ContentId = contentId; + } + } + + public class ContentNotFoundException : Exception + { + public long ContentId { get; } + + public ContentNotFoundException(long contentId) + : base($"Content no longer exists. Content ID: {contentId}") + { + ContentId = contentId; + } + } + + + +} \ No newline at end of file diff --git a/services/net/auto-clipper/LLM/ClipDefinition.cs b/services/net/auto-clipper/LLM/ClipDefinition.cs new file mode 100644 index 0000000000..ff997e17f5 --- /dev/null +++ b/services/net/auto-clipper/LLM/ClipDefinition.cs @@ -0,0 +1,11 @@ +using System; + +namespace TNO.Services.AutoClipper.LLM; + +/// +/// Represents a suggested clip from the LLM response. +/// +public record ClipDefinition(string Title, string Summary, TimeSpan Start, TimeSpan End, string Category = "News") +{ + public bool IsValid => End > Start; +} diff --git a/services/net/auto-clipper/LLM/ClipSegmentationService.cs b/services/net/auto-clipper/LLM/ClipSegmentationService.cs new file mode 100644 index 0000000000..87f2ac2dff --- /dev/null +++ b/services/net/auto-clipper/LLM/ClipSegmentationService.cs @@ -0,0 +1,471 @@ +using System.Globalization; +using System.Net.Http.Json; +using System.Text; +using System.Text.Json; +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TNO.Core.Exceptions; +using TNO.Core.Extensions; +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.Config; +using TNO.Services.AutoClipper.LLM.Models; + +namespace TNO.Services.AutoClipper.LLM; + +/// +/// ClipSegmentationService class, provides a way to send a transcript to Azure Open AI to identify separate stories within the transcript. +/// +public class ClipSegmentationService : IClipSegmentationService +{ + private const string DefaultSystemPrompt = "You are a news segment tool. Analyze timestamped transcripts, choose where new stories begin, and output JSON suitable for ffmpeg clip creation."; + + private const int ParagraphSentenceCount = 4; + + private const string DefaultPrompt = """ +You will receive a transcript formatted as numbered sentences (index. timestamp range :: sentence). +Identify up to {{max_clips}} places where a new story starts and return ONLY JSON: +{ + \"boundaries\": [ + {\"index\": 12, \"title\": \"slug\", \"summary\": \"recap\", \"score\": 0.82} + ] +} + +Rules: +- `index` is the numbered sentence (1-based) where the new story begins. +- `score` ranges from 0-1; higher means stronger confidence. +- Consider the optional heuristic cues before discarding a boundary. +- Keep boundaries chronological and avoid duplicates. +- Do not invent timestamps; rely only on the provided lines. + +Heuristic cues (if provided): +{{heuristic_notes}} + +Transcript: +{{transcript}} +"""; + + private readonly HttpClient _httpClient; + private readonly AutoClipperOptions _options; + private readonly ILogger _logger; + + public ClipSegmentationService(HttpClient httpClient, IOptions options, ILogger logger) + { + _httpClient = httpClient; + _options = options.Value; + _logger = logger; + } + + public async Task> GenerateClipsAsync(IReadOnlyList transcript, ClipSegmentationSettings? settings, CancellationToken cancellationToken) + { + if (transcript == null || transcript.Count == 0) + return []; + + if (_options.LlmApiUrl == null || string.IsNullOrWhiteSpace(_options.LlmApiKey)) + throw new InvalidOperationException("LLM configuration is missing the Azure OpenAI endpoint, or API key."); + + try + { + var heuristicHits = BuildHeuristicHits(transcript, settings); + var prompt = BuildPrompt(transcript, settings, heuristicHits); + var systemPrompt = string.IsNullOrWhiteSpace(settings?.SystemPrompt) ? DefaultSystemPrompt : settings!.SystemPrompt!; + var payload = new + { + model = string.IsNullOrWhiteSpace(settings?.ModelOverride) ? _options.LlmDefaultModel : settings!.ModelOverride!, + temperature = settings!.TemperatureOverride, + messages = new object[] + { + new { role = "system", content = systemPrompt }, + new { role = "user", content = prompt } + } + }; + + var requestUri = _options.LlmApiUrl; + _logger.LogDebug("Sending LLM segmentation request to {RequestUri} with payload: {Payload}", requestUri, JsonSerializer.Serialize(payload)); + using var request = new HttpRequestMessage(HttpMethod.Post, requestUri); + request.Headers.Add("api-key", _options.LlmApiKey); + request.Content = JsonContent.Create(payload); + + using var response = await _httpClient.SendAsync(request, cancellationToken); + var body = await response.Content.ReadAsStringAsync(cancellationToken); + + if (response.IsSuccessStatusCode) + { + + var clipDefinitions = ParseResponse(body, transcript, settings, heuristicHits); + if (clipDefinitions.Count == 0) + { + _logger.LogWarning("LLM segmentation did not return any clips."); + return []; + } + + return clipDefinitions; + } + else + { + var responseException = new HttpClientRequestException(response); + _logger.LogError(responseException, "Failed to segment transcript with LLM. Error: {Details}", body); + return []; + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to segment transcript with LLM. Error: {Details}", ex.GetAllMessages()); + return []; + } + } + + private string BuildPrompt(IReadOnlyList transcript, ClipSegmentationSettings? overrides, IReadOnlyList heuristicHits) + { + var template = !string.IsNullOrWhiteSpace(overrides?.PromptOverride) + ? overrides!.PromptOverride! + : string.IsNullOrWhiteSpace(_options.LlmPrompt) ? DefaultPrompt : _options.LlmPrompt; + var includesHeuristicPlaceholder = template.Contains("{{heuristic_notes}}"); + var limit = ResolvePromptLimit(overrides?.PromptCharacterLimit); + var transcriptBody = BuildPromptTranscript(transcript, limit); + var heuristicNotes = BuildHeuristicNotes(heuristicHits, transcript); + + var maxClips = overrides?.MaxStories ?? _options.MaxStoriesFromClip; + if (maxClips <= 0) maxClips = _options.MaxStoriesFromClip; + + var prompt = template + .Replace("{{max_clips}}", maxClips.ToString(CultureInfo.InvariantCulture)) + .Replace("{{transcript}}", transcriptBody) + .Replace("{{heuristic_notes}}", heuristicNotes); + + if (!includesHeuristicPlaceholder && !string.IsNullOrWhiteSpace(heuristicNotes)) + { + prompt += "\n\nHeuristic cues (for reference):\n" + heuristicNotes; + } + + return prompt; + } + + private int? ResolvePromptLimit(int? overrideLimit) + { + if (overrideLimit.HasValue) + return overrideLimit.Value > 0 ? overrideLimit.Value : null; + + return _options.LlmPromptCharacterLimit > 0 ? _options.LlmPromptCharacterLimit : null; + } + + private static string BuildPromptTranscript(IReadOnlyList transcript, int? limit) + { + var builder = new StringBuilder(); + builder.AppendLine("Sentences:"); + for (var i = 0; i < transcript.Count; i++) + { + var sentence = transcript[i]; + if (string.IsNullOrWhiteSpace(sentence.Text)) continue; + var line = $"{i + 1}. {FormatTimestamp(sentence.Start)} --> {FormatTimestamp(sentence.End)} :: {sentence.Text.Trim()}"; + if (limit.HasValue && builder.Length + line.Length > limit.Value) + break; + builder.AppendLine(line); + } + + builder.AppendLine(); + builder.AppendLine("Paragraphs:"); + var paragraphNumber = 1; + var index = 0; + while (index < transcript.Count && (!limit.HasValue || builder.Length < limit.Value)) + { + var start = index; + var end = Math.Min(index + ParagraphSentenceCount, transcript.Count); + var sentences = new List(); + for (var j = start; j < end; j++) + { + var sentence = transcript[j]; + if (string.IsNullOrWhiteSpace(sentence.Text)) continue; + sentences.Add(sentence.Text.Trim()); + } + + if (sentences.Count > 0) + { + var line = $"Paragraph {paragraphNumber} (sentences {start + 1}-{end}): {string.Join(" / ", sentences)}"; + if (limit.HasValue && builder.Length + line.Length > limit.Value) break; + builder.AppendLine(line); + paragraphNumber++; + } + + index += ParagraphSentenceCount; + } + + return builder.ToString(); + } + + private static string BuildHeuristicNotes(IReadOnlyList? hits, IReadOnlyList transcript) + { + if (hits == null || hits.Count == 0) return ""; + var sb = new StringBuilder(); + foreach (var hit in hits.OrderBy(h => h.Index)) + { + var sentence = transcript[hit.Index]; + var snippet = string.IsNullOrWhiteSpace(sentence.Text) ? string.Empty : sentence.Text.Trim(); + var meta = BuildHeuristicMeta(hit); + sb.AppendLine($"Sentence {hit.Index + 1} ({FormatTimestamp(sentence.Start)}): '{hit.Pattern}'{meta} -> {snippet}"); + } + return sb.ToString().Trim(); + } + + private static string BuildHeuristicMeta(HeuristicHit hit) + { + var parts = new List { $"w={hit.Weight:0.00}" }; + if (!string.IsNullOrWhiteSpace(hit.Category)) parts.Add($"cat={hit.Category}"); + if (!string.IsNullOrWhiteSpace(hit.Note)) parts.Add(hit.Note!); + return parts.Count == 0 ? string.Empty : $" [{string.Join(", ", parts)}]"; + } + private IReadOnlyList BuildHeuristicHits(IReadOnlyList transcript, ClipSegmentationSettings? settings) + { + if (transcript == null || transcript.Count == 0) return Array.Empty(); + var patternEntries = settings?.HeuristicPatternEntries; + var legacyPatterns = settings?.KeywordPatterns; + var baseWeight = settings?.HeuristicBoundaryWeight ?? 0; + var hasEntryOverrides = patternEntries != null && patternEntries.Count > 0; + if (!hasEntryOverrides && (legacyPatterns == null || legacyPatterns.Count == 0)) return Array.Empty(); + if (!hasEntryOverrides && baseWeight <= 0) return Array.Empty(); + + var hits = new List(); + var categoryLookup = settings?.KeywordCategories ?? new Dictionary(); + + void AddMatches(string pattern, double weight, string? category, string? note) + { + if (string.IsNullOrWhiteSpace(pattern) || weight <= 0) return; + Regex regex; + try + { + regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Invalid heuristic pattern: {Pattern}", pattern); + return; + } + + for (var i = 0; i < transcript.Count; i++) + { + var textValue = transcript[i].Text; + if (string.IsNullOrWhiteSpace(textValue)) continue; + if (regex.IsMatch(textValue)) + hits.Add(new HeuristicHit(i, pattern, weight, category, note)); + } + } + + if (hasEntryOverrides) + { + foreach (var entry in patternEntries!) + { + if (entry == null || string.IsNullOrWhiteSpace(entry.Pattern)) continue; + var weight = entry.Weight ?? baseWeight; + if (weight <= 0) continue; + var category = !string.IsNullOrWhiteSpace(entry.Category) + ? entry.Category + : (categoryLookup.TryGetValue(entry.Pattern, out var mapped) ? mapped : null); + AddMatches(entry.Pattern, weight, category, entry.Note); + } + } + else if (legacyPatterns != null) + { + foreach (var pattern in legacyPatterns) + { + if (string.IsNullOrWhiteSpace(pattern)) continue; + var category = categoryLookup.TryGetValue(pattern, out var mappedCategory) ? mappedCategory : null; + AddMatches(pattern, baseWeight, category, null); + } + } + + return hits; + } + + private IReadOnlyList ParseResponse(string? body, IReadOnlyList transcript, ClipSegmentationSettings? settings, IReadOnlyList heuristicHits) + { + if (string.IsNullOrWhiteSpace(body)) return []; + + try + { + body = StripCodeFence(body); + var doc = JsonSerializer.Deserialize(body); + if (doc == null || doc.Choices == null || doc.Choices.Count == 0) + { + _logger.LogWarning("LLM response deserialization resulted in null or empty choices."); + return []; + } + + var candidates = new List(); + foreach (var content in doc.Choices?.Select(c => c.Message?.Content).Where(c => c != null) ?? []) + { + if (content == null) continue; + + var boundaries = JsonSerializer.Deserialize(content!); + if (boundaries == null || boundaries.Boundaries == null) continue; + foreach (var boundary in boundaries.Boundaries) + { + var rawIndex = boundary.Index; + if (rawIndex <= 0) + continue; + var zeroIndex = Math.Clamp(rawIndex - 1, 0, transcript.Count - 1); + var title = string.IsNullOrWhiteSpace(boundary.Title) ? "Clip" : boundary.Title; + var summary = string.IsNullOrWhiteSpace(boundary.Summary) ? string.Empty : boundary.Summary; + var category = string.IsNullOrWhiteSpace(boundary.Category) ? null : boundary.Category; + var score = Math.Clamp(boundary.Score, 0, 1); + candidates.Add(new BoundaryCandidate(zeroIndex, title, summary, score, false, category)); + } + } + + var threshold = Math.Clamp(_options.LlmBoundaryScoreThreshold, 0, 1); + return CreateClipDefinitions(transcript, candidates, threshold, heuristicHits); + } + catch (Exception ex) + { + _logger.LogError(ex, "Unable to parse LLM segmentation response. Raw body: {body}", body); + return []; + } + } + + private IReadOnlyList CreateClipDefinitions(IReadOnlyList transcript, List candidates, double threshold, IReadOnlyList heuristicHits) + { + if (transcript == null || transcript.Count == 0) + return []; + + var map = new Dictionary(); + foreach (var candidate in candidates) + { + var index = Math.Clamp(candidate.Index, 0, transcript.Count - 1); + if (!map.TryGetValue(index, out var existing) || candidate.Score > existing.Score) + map[index] = candidate with { Index = index }; + } + + if (heuristicHits != null && heuristicHits.Count > 0) + { + foreach (var hit in heuristicHits) + { + var index = Math.Clamp(hit.Index, 0, transcript.Count - 1); + var heuristicCandidate = new BoundaryCandidate(index, $"Heuristic boundary ({hit.Pattern})", string.Empty, hit.Weight, true, hit.Category); + if (!map.TryGetValue(index, out var existing) || heuristicCandidate.Score > existing.Score) + map[index] = heuristicCandidate; + } + } + + var ordered = map.Values.OrderBy(c => c.Index).ToList(); + if (ordered.Count == 0) + ordered.Add(new BoundaryCandidate(0, "Full Program", "AutoClipper fallback clip", 1)); + + if (ordered[0].Index != 0) + ordered.Insert(0, ordered[0] with { Index = 0, Score = 1, IsHeuristic = false }); + + var filtered = new List(); + foreach (var candidate in ordered) + { + if (candidate.Index == 0 || candidate.Score >= threshold) + filtered.Add(candidate); + } + if (filtered.Count == 0) + filtered.Add(new BoundaryCandidate(0, "Full Program", "AutoClipper fallback clip", 1)); + + var list = new List(); + for (var i = 0; i < filtered.Count; i++) + { + var boundary = filtered[i]; + var start = transcript[boundary.Index].Start; + var endIndex = i + 1 < filtered.Count ? filtered[i + 1].Index : transcript.Count - 1; + var end = i + 1 < filtered.Count ? transcript[filtered[i + 1].Index].Start : transcript[^1].End; + if (end <= start) continue; + var title = string.IsNullOrWhiteSpace(boundary.Title) ? $"Clip {i + 1}" : boundary.Title; + var summary = string.IsNullOrWhiteSpace(boundary.Summary) ? string.Empty : boundary.Summary; + var category = DetermineCategory(boundary, heuristicHits, boundary.Index, endIndex) ?? "News"; + list.Add(new ClipDefinition(title, summary, start, end, category)); + _logger.LogInformation("Boundary {BoundaryIndex}: {Title} ({Start}-{End}) Score={Score:0.00} Heuristic={IsHeuristic} Category={Category}", boundary.Index + 1, title, start, end, boundary.Score, boundary.IsHeuristic, category); + } + + return FilterOverlaps(list); + } + + private static string? DetermineCategory(BoundaryCandidate boundary, IReadOnlyList? hits, int startIndex, int endIndex) + { + if (!string.IsNullOrWhiteSpace(boundary.Category)) return boundary.Category; + if (hits == null || hits.Count == 0) return null; + var best = hits + .Where(h => h.Index >= startIndex && h.Index <= endIndex) + .OrderByDescending(h => h.Weight) + .ThenBy(h => h.Index) + .FirstOrDefault(h => !string.IsNullOrWhiteSpace(h.Category)); + return best?.Category; + } + + private sealed record BoundaryCandidate(int Index, string Title, string Summary, double Score, bool IsHeuristic = false, string? Category = null); + + private sealed record HeuristicHit(int Index, string Pattern, double Weight, string? Category, string? Note); + + private static string StripCodeFence(string body) + { + if (string.IsNullOrWhiteSpace(body)) return body ?? string.Empty; + var trimmed = body.Trim(); + if (trimmed.StartsWith("```", StringComparison.Ordinal)) + { + var newline = trimmed.IndexOf('\n'); + var closingFence = trimmed.LastIndexOf("```", StringComparison.Ordinal); + if (newline >= 0 && closingFence > newline) + { + trimmed = trimmed[(newline + 1)..closingFence].Trim(); + } + } + return trimmed; + } + + private static TimeSpan ReadTime(JsonElement element, string property) + { + if (!element.TryGetProperty(property, out var node)) return TimeSpan.Zero; + if (node.ValueKind == JsonValueKind.Number && node.TryGetDouble(out var seconds)) return TimeSpan.FromSeconds(Math.Max(0, seconds)); + if (node.ValueKind == JsonValueKind.String) + { + var value = node.GetString(); + if (double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out var fromString)) + return TimeSpan.FromSeconds(Math.Max(0, fromString)); + if (TimeSpan.TryParse(value, CultureInfo.InvariantCulture, out var ts)) return ts; + } + return TimeSpan.Zero; + } + + private static string FormatTimestamp(TimeSpan value) + { + return string.Format(CultureInfo.InvariantCulture, "{0:00}:{1:00}:{2:00}.{3:000}", (int)value.TotalHours, value.Minutes, value.Seconds, value.Milliseconds); + } + + private static (TimeSpan Start, TimeSpan End)? SnapToTranscriptBounds(IReadOnlyList transcript, TimeSpan start, TimeSpan end) + { + if (transcript == null || transcript.Count == 0) return null; + var first = transcript.FirstOrDefault(s => s.End > start); + var last = transcript.LastOrDefault(s => s.Start < end); + if (first == null || last == null) return null; + if (last.End > transcript[^1].End) last = transcript[^1]; + if (last.End <= first.Start) return null; + return (first.Start, last.End); + } + + private static IReadOnlyList FilterOverlaps(IReadOnlyList clips) + { + if (clips == null || clips.Count == 0) return Array.Empty(); + var ordered = clips.OrderBy(c => c.Start).ToArray(); + var result = new List(ordered.Length); + var lastEnd = TimeSpan.Zero; + foreach (var clip in ordered) + { + if (clip.Start < lastEnd) + continue; + result.Add(clip); + lastEnd = clip.End; + } + return result; + } +} + + + + + + + + + + + + diff --git a/services/net/auto-clipper/LLM/ClipSegmentationSettings.cs b/services/net/auto-clipper/LLM/ClipSegmentationSettings.cs new file mode 100644 index 0000000000..828e8a0d01 --- /dev/null +++ b/services/net/auto-clipper/LLM/ClipSegmentationSettings.cs @@ -0,0 +1,24 @@ +namespace TNO.Services.AutoClipper.LLM; + +public class ClipSegmentationSettings +{ + public string? PromptOverride { get; set; } + public string? ModelOverride { get; set; } + public float? TemperatureOverride { get; set; } + public string? SystemPrompt { get; set; } + public int? PromptCharacterLimit { get; set; } + public int? MaxStories { get; set; } + public IReadOnlyList? KeywordPatterns { get; set; } + public double? HeuristicBoundaryWeight { get; set; } + public IReadOnlyDictionary? KeywordCategories { get; set; } + public IReadOnlyList? HeuristicPatternEntries { get; set; } +} + +public class HeuristicPatternSetting +{ + public string Pattern { get; set; } = string.Empty; + public double? Weight { get; set; } + public string? Category { get; set; } + public string? Note { get; set; } +} + diff --git a/services/net/auto-clipper/LLM/IClipSegmentationService.cs b/services/net/auto-clipper/LLM/IClipSegmentationService.cs new file mode 100644 index 0000000000..52743ee6a0 --- /dev/null +++ b/services/net/auto-clipper/LLM/IClipSegmentationService.cs @@ -0,0 +1,8 @@ +using TNO.Services.AutoClipper.Azure; + +namespace TNO.Services.AutoClipper.LLM; + +public interface IClipSegmentationService +{ + Task> GenerateClipsAsync(IReadOnlyList transcript, ClipSegmentationSettings? settings, CancellationToken cancellationToken); +} diff --git a/services/net/auto-clipper/LLM/Models/LLMChoice.cs b/services/net/auto-clipper/LLM/Models/LLMChoice.cs new file mode 100644 index 0000000000..d0752d8c10 --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/LLMChoice.cs @@ -0,0 +1,15 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class LLMChoice +{ + [JsonPropertyName("index")] + public int Index { get; set; } + + [JsonPropertyName("message")] + public LLMMessage? Message { get; set; } + + [JsonPropertyName("finish_reason")] + public string? FinishReason { get; set; } +} diff --git a/services/net/auto-clipper/LLM/Models/LLMMessage.cs b/services/net/auto-clipper/LLM/Models/LLMMessage.cs new file mode 100644 index 0000000000..62d476f9bf --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/LLMMessage.cs @@ -0,0 +1,12 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class LLMMessage +{ + [JsonPropertyName("role")] + public string? Role { get; set; } + + [JsonPropertyName("content")] + public string? Content { get; set; } +} diff --git a/services/net/auto-clipper/LLM/Models/LLMResponse.cs b/services/net/auto-clipper/LLM/Models/LLMResponse.cs new file mode 100644 index 0000000000..c5ad47aa78 --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/LLMResponse.cs @@ -0,0 +1,27 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class LLMResponse +{ + [JsonPropertyName("id")] + public string? Id { get; set; } + + [JsonPropertyName("object")] + public string? Object { get; set; } + + [JsonPropertyName("created")] + public long Created { get; set; } + + [JsonPropertyName("model")] + public string? Model { get; set; } + + [JsonPropertyName("choices")] + public List? Choices { get; set; } + + [JsonPropertyName("usage")] + public LLMUsage? Usage { get; set; } + + [JsonPropertyName("status")] + public string? Status { get; set; } +} diff --git a/services/net/auto-clipper/LLM/Models/LLMUsage.cs b/services/net/auto-clipper/LLM/Models/LLMUsage.cs new file mode 100644 index 0000000000..a0e090c040 --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/LLMUsage.cs @@ -0,0 +1,15 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class LLMUsage +{ + [JsonPropertyName("prompt_tokens")] + public int PromptTokens { get; set; } + + [JsonPropertyName("completion_tokens")] + public int CompletionTokens { get; set; } + + [JsonPropertyName("total_tokens")] + public int TotalTokens { get; set; } +} diff --git a/services/net/auto-clipper/LLM/Models/TranscriptBoundaries.cs b/services/net/auto-clipper/LLM/Models/TranscriptBoundaries.cs new file mode 100644 index 0000000000..ad0f606163 --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/TranscriptBoundaries.cs @@ -0,0 +1,9 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class TranscriptBoundaries +{ + [JsonPropertyName("boundaries")] + public TranscriptBoundary[] Boundaries { get; set; } = []; +} diff --git a/services/net/auto-clipper/LLM/Models/TranscriptBoundary.cs b/services/net/auto-clipper/LLM/Models/TranscriptBoundary.cs new file mode 100644 index 0000000000..015c2923fc --- /dev/null +++ b/services/net/auto-clipper/LLM/Models/TranscriptBoundary.cs @@ -0,0 +1,21 @@ +using System.Text.Json.Serialization; + +namespace TNO.Services.AutoClipper.LLM.Models; + +public class TranscriptBoundary +{ + [JsonPropertyName("index")] + public int Index { get; set; } + + [JsonPropertyName("title")] + public string Title { get; set; } = ""; + + [JsonPropertyName("summary")] + public string Summary { get; set; } = ""; + + [JsonPropertyName("category")] + public string? Category { get; set; } = ""; + + [JsonPropertyName("score")] + public float Score { get; set; } +} diff --git a/services/net/auto-clipper/Pipeline/ClipProcessingContext.cs b/services/net/auto-clipper/Pipeline/ClipProcessingContext.cs new file mode 100644 index 0000000000..563c99b81c --- /dev/null +++ b/services/net/auto-clipper/Pipeline/ClipProcessingContext.cs @@ -0,0 +1,6 @@ +using TNO.Kafka.Models; +using TNO.Services.AutoClipper.Config; + +namespace TNO.Services.AutoClipper.Pipeline; + +public record ClipProcessingContext(string SourcePath, StationProfile StationProfile, ClipRequestModel Request, int TargetSampleRate); diff --git a/services/net/auto-clipper/Pipeline/ClipProcessingPipeline.cs b/services/net/auto-clipper/Pipeline/ClipProcessingPipeline.cs new file mode 100644 index 0000000000..0d098b51cb --- /dev/null +++ b/services/net/auto-clipper/Pipeline/ClipProcessingPipeline.cs @@ -0,0 +1,90 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TNO.Services.AutoClipper.Audio; +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.Config; +using TNO.Services.AutoClipper.LLM; + +namespace TNO.Services.AutoClipper.Pipeline; + +public class ClipProcessingPipeline +{ + private readonly IAudioNormalizer _audioNormalizer; + private readonly IAzureSpeechTranscriptionService _speechTranscriber; + private readonly IClipSegmentationService _clipSegmentation; + private readonly AutoClipperOptions _options; + private readonly ILogger _logger; + + public ClipProcessingPipeline( + IAudioNormalizer audioNormalizer, + IAzureSpeechTranscriptionService speechTranscriber, + IClipSegmentationService clipSegmentation, + IOptions options, + ILogger logger) + { + _audioNormalizer = audioNormalizer; + _speechTranscriber = speechTranscriber; + _clipSegmentation = clipSegmentation; + _options = options.Value; + _logger = logger; + } + + public async Task ExecuteAsync(ClipProcessingContext context, CancellationToken cancellationToken) + { + var normalizedPath = await _audioNormalizer.NormalizeAsync(context.SourcePath, context.TargetSampleRate, cancellationToken); + var language = !string.IsNullOrWhiteSpace(context.Request.Language) + ? context.Request.Language! + : !string.IsNullOrWhiteSpace(context.StationProfile.Transcription.Language) + ? context.StationProfile.Transcription.Language + : _options.DefaultTranscriptLanguage; + var transcriptionRequest = new SpeechTranscriptionRequest + { + Language = language, + EnableSpeakerDiarization = context.StationProfile.Transcription.Diarization, + SpeakerCount = context.StationProfile.Transcription.MaxSpeakers, + DiarizationMode = context.StationProfile.Transcription.DiarizationMode + }; + + var segments = await _speechTranscriber.TranscribeAsync(normalizedPath, transcriptionRequest, cancellationToken); + var segmentationSettings = BuildSegmentationSettings(context.StationProfile); + var clipDefinitions = await _clipSegmentation.GenerateClipsAsync(segments, segmentationSettings, cancellationToken); + + return new ClipProcessingResult(normalizedPath, language, segments, clipDefinitions, segmentationSettings); + } + + private static ClipSegmentationSettings BuildSegmentationSettings(StationProfile profile) + { + return new ClipSegmentationSettings + { + PromptOverride = string.IsNullOrWhiteSpace(profile.Text.LlmPrompt) ? null : profile.Text.LlmPrompt, + ModelOverride = string.IsNullOrWhiteSpace(profile.Text.LlmModel) ? null : profile.Text.LlmModel, + TemperatureOverride = profile.Text.LlmTemperature, + SystemPrompt = string.IsNullOrWhiteSpace(profile.Text.SystemPrompt) ? null : profile.Text.SystemPrompt, + PromptCharacterLimit = profile.Text.PromptCharacterLimit, + MaxStories = profile.Text.MaxStories, + KeywordPatterns = profile.Heuristics.KeywordPatterns?.ToArray(), + + HeuristicPatternEntries = profile.Heuristics.PatternEntries? + .Where(p => p != null && !string.IsNullOrWhiteSpace(p.Pattern)) + .Select(p => new HeuristicPatternSetting + { + Pattern = p.Pattern!, + Weight = p.Weight, + Category = string.IsNullOrWhiteSpace(p.Category) ? null : p.Category, + Note = p.Note + }) + .ToArray(), + HeuristicBoundaryWeight = profile.Text.HeuristicBoundaryWeight, + KeywordCategories = profile.Text.KeywordCategories?.ToDictionary(kvp => kvp.Key, kvp => kvp.Value) + }; + } +} + + + + + + + + + diff --git a/services/net/auto-clipper/Pipeline/ClipProcessingResult.cs b/services/net/auto-clipper/Pipeline/ClipProcessingResult.cs new file mode 100644 index 0000000000..dd2362f2c5 --- /dev/null +++ b/services/net/auto-clipper/Pipeline/ClipProcessingResult.cs @@ -0,0 +1,11 @@ +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.LLM; + +namespace TNO.Services.AutoClipper.Pipeline; + +public record ClipProcessingResult( + string NormalizedAudioPath, + string Language, + IReadOnlyList Segments, + IReadOnlyList ClipDefinitions, + ClipSegmentationSettings SegmentationSettings); diff --git a/services/net/auto-clipper/Program.cs b/services/net/auto-clipper/Program.cs new file mode 100644 index 0000000000..6179264ca1 --- /dev/null +++ b/services/net/auto-clipper/Program.cs @@ -0,0 +1,19 @@ +namespace TNO.Services.AutoClipper; + +/// +/// Program static class, runs program. +/// +public static class Program +{ + /// + /// Create an instance of the AutoClipperService and run it. + /// + /// + /// + public static Task Main(string[] args) + { + // Run the Transcription service console program. + var program = new AutoClipperService(args); + return program.RunAsync(); + } +} diff --git a/services/net/auto-clipper/README.md b/services/net/auto-clipper/README.md new file mode 100644 index 0000000000..171d2ad5fb --- /dev/null +++ b/services/net/auto-clipper/README.md @@ -0,0 +1,27 @@ +# AutoClipper Service + +The AutoClipper service consumes clip requests from Kafka, normalizes audio, transcribes it with Azure Speech, and +segments the transcript into clips using a boundary-aware LLM workflow boosted by station heuristics. Key concepts: + +- **Station profiles** (Config/Stations/\*.yml) define language, sample rate, heuristic keywords, custom prompts, and + category mappings for weather/traffic/ads. +- **Pipeline** (ClipProcessingPipeline) normalizes audio, transcribes via AzureSpeechTranscriptionService, and feeds + transcripts plus station config into ClipSegmentationService. +- **Segmentation** uses Azure OpenAI to score story boundaries, merges in regex-based heuristics, snaps clips to transcript + sentences, and tags each clip with a category before AutoClipperManager creates content and uploads the media. + +## Development + +1. Update station YAMLs under Config/Stations (copy CKNW.yml as a starting point). +2. Run dotnet build services/net/auto-clipper/TNO.Services.AutoClipper.csproj to verify changes. +3. Use the harness (see tools/auto-clipper-harness/README.md) to manually validate segmentation on sample audio. + +## Configuration + +Important Service\_\_ env vars: + +- Service**AzureSpeechKey / Service**AzureSpeechRegion +- Service**AzureSpeechStorageConnectionString / Service**AzureSpeechStorageContainer (batch upload destination for Azure Speech). +- Service**AzureSpeechBatchEndpoint, Service**AzureSpeechBatchApiVersion, Service**AzureSpeechBatchPollingIntervalSeconds, Service**AzureSpeechBatchTimeoutMinutes, Service\_\_AzureSpeechStorageSasExpiryMinutes (optional batch tuning). +- Service**LlmApiUrl, Service**LlmApiKey, Service**LlmDeployment, Service**LlmApiVersion +- Service\_\_StationConfigPath (optional override for station YAML directory) diff --git a/services/net/auto-clipper/TNO.Services.AutoClipper.csproj b/services/net/auto-clipper/TNO.Services.AutoClipper.csproj new file mode 100644 index 0000000000..16a150c259 --- /dev/null +++ b/services/net/auto-clipper/TNO.Services.AutoClipper.csproj @@ -0,0 +1,43 @@ + + + + Exe + net9.0 + enable + enable + TNO.Services.AutoClipper + 1.0.0.0 + 1.0.0.0 + + + + + + + + + + + + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + diff --git a/services/net/auto-clipper/appsettings.Development.json b/services/net/auto-clipper/appsettings.Development.json new file mode 100644 index 0000000000..7ea658fedf --- /dev/null +++ b/services/net/auto-clipper/appsettings.Development.json @@ -0,0 +1,41 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Warning", + "Microsoft": "Warning", + "TNO": "Debug" + } + }, + "Service": { + "MaxFailLimit": 5, + "ApiUrl": "http://host.docker.internal:40010/api" + }, + "CHES": { + "AuthUrl": "https://dev.loginproxy.gov.bc.ca/auth/realms/comsvcauth/protocol/openid-connect/token", + "HostUri": "https://ches-dev.api.gov.bc.ca/api/v1", + "From": "Media Monitoring Insights ", + "EmailEnabled": true, + "EmailAuthorized": false + }, + "Kafka": { + "Consumer": { + "BootstrapServers": "host.docker.internal:40102", + "GroupId": "AutoClipper", + "MaxThreads": 2 + }, + "Producer": { + "BootstrapServers": "host.docker.internal:40102", + "ClientId": "AutoClipper" + } + }, + "Auth": { + "Keycloak": { + "Authority": "https://dev.loginproxy.gov.bc.ca/auth", + "Audience": "mmi-service-account", + "Secret": "{DO NOT STORE SECRET HERE}" + }, + "OIDC": { + "Token": "/realms/mmi/protocol/openid-connect/token" + } + } +} diff --git a/services/net/auto-clipper/appsettings.Staging.json b/services/net/auto-clipper/appsettings.Staging.json new file mode 100644 index 0000000000..a9f74118fc --- /dev/null +++ b/services/net/auto-clipper/appsettings.Staging.json @@ -0,0 +1,41 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Warning", + "Microsoft": "Error", + "TNO": "Information" + } + }, + "Service": { + "MaxFailLimit": 5, + "ApiUrl": "http://api:8080" + }, + "CHES": { + "AuthUrl": "https://test.loginproxy.gov.bc.ca/auth/realms/comsvcauth/protocol/openid-connect/token", + "HostUri": "https://ches-test.api.gov.bc.ca/api/v1", + "From": "Media Monitoring Insights ", + "EmailEnabled": true, + "EmailAuthorized": false + }, + "Kafka": { + "Consumer": { + "BootstrapServers": "kafka-broker-0.kafka-headless:9092,kafka-broker-1.kafka-headless:9092,kafka-broker-2.kafka-headless:9092", + "GroupId": "AutoClipper", + "MaxThreads": 2 + }, + "Producer": { + "BootstrapServers": "kafka-broker-0.kafka-headless:9092,kafka-broker-1.kafka-headless:9092,kafka-broker-2.kafka-headless:9092", + "ClientId": "AutoClipper" + } + }, + "Auth": { + "Keycloak": { + "Authority": "https://test.loginproxy.gov.bc.ca/auth", + "Audience": "mmi-service-account", + "Secret": "{DO NOT STORE SECRET HERE}" + }, + "OIDC": { + "Token": "/realms/mmi/protocol/openid-connect/token" + } + } +} diff --git a/services/net/auto-clipper/appsettings.json b/services/net/auto-clipper/appsettings.json new file mode 100644 index 0000000000..4bf3fbc1ed --- /dev/null +++ b/services/net/auto-clipper/appsettings.json @@ -0,0 +1,110 @@ +{ + "BaseUrl": "/", + "Logging": { + "Console": { + "DisableColors": true + }, + "LogLevel": { + "Default": "Warning", + "Microsoft": "Error", + "TNO": "Information" + } + }, + "Serilog": { + "Using": ["Serilog.Sinks.Console"], + "MinimumLevel": { + "Default": "Information", + "Override": { + "Microsoft": "Error", + "System.Net.Http": "Warning", + "TNO": "Debug" + } + }, + "WriteTo": [ + { + "Name": "Console", + "Args": { + "outputTemplate": "[{Timestamp:HH:mm:ss} level={CustomLevel}] {Message:lj}{NewLine}{Exception}" + } + } + ], + "Enrich": ["FromLogContext"] + }, + "AllowedHosts": "*", + "Service": { + "MaxFailLimit": 5, + "ApiUrl": "http://api:8080", + "TimeZone": "Pacific Standard Time", + "Topics": "request-clips", + "VolumePath": "/data", + "MaxStoriesFromClip": 5, + + "ApplyTags": ["AUTOCLIP"], + "SendEmailOnFailure": true, + "NoticeEmailTo": "", + "AzureSpeechKey": "", + "AzureSpeechRegion": "westus", + "AzureSpeechBatchEndpoint": "", + "AzureSpeechBatchApiVersion": "v3.2", + "AzureSpeechBatchPollingIntervalSeconds": 10, + "AzureSpeechBatchTimeoutMinutes": 45, + "AzureSpeechStorageConnectionString": "", + "AzureSpeechStorageContainer": "", + "AzureSpeechStorageSasExpiryMinutes": 180, + "DefaultTranscriptLanguage": "en-US", + + "LlmApiUrl": "https://mmiopenai.cognitiveservices.azure.com/", + "LlmApiKey": "", + "LlmDefaultModel": "", + "LlmDeployment": "", + "LlmApiVersion": "", + "LlmPrompt": "", + "LlmPromptCharacterLimit": 0, + "StationConfigPath": "Config/Stations" + }, + "CHES": { + "AuthUrl": "https://loginproxy.gov.bc.ca/auth/realms/comsvcauth/protocol/openid-connect/token", + "HostUri": "https://ches.api.gov.bc.ca/api/v1", + "From": "Media Monitoring Insights ", + "EmailEnabled": true, + "EmailAuthorized": false + }, + "Auth": { + "Keycloak": { + "Authority": "https://loginproxy.gov.bc.ca/auth", + "Audience": "mmi-service-account", + "Secret": "{DO NOT STORE SECRET HERE}" + }, + "OIDC": { + "Token": "/realms/mmi/protocol/openid-connect/token" + } + }, + "Serialization": { + "Json": { + "PropertyNamingPolicy": "CamelCase", + "PropertyNameCaseInsensitive": true, + "DefaultIgnoreCondition": "WhenWritingNull", + "WriteIndented": true + } + }, + "Kafka": { + "Consumer": { + "GroupId": "AutoClipper", + "BootstrapServers": "kafka-broker-0.kafka-headless:9092,kafka-broker-1.kafka-headless:9092,kafka-broker-2.kafka-headless:9092", + "AutoOffsetReset": "Earliest", + "EnableAutoCommit": false, + "MaxThreads": 2, + "MaxPollIntervalMs": 600000 + }, + "Producer": { + "ClientId": "AutoClipper", + "BootstrapServers": "kafka-broker-0.kafka-headless:9092,kafka-broker-1.kafka-headless:9092,kafka-broker-2.kafka-headless:9092", + "Acks": "All", + "EnableIdempotence": true, + "MaxInFlight": 5, + "MessageSendMaxRetries": 10000000, + "BatchSize": 16384, + "LingerMs": 1 + } + } +} diff --git a/services/net/notification/TNO.Services.Notification.csproj b/services/net/notification/TNO.Services.Notification.csproj index 6eadc5845c..696936b742 100644 --- a/services/net/notification/TNO.Services.Notification.csproj +++ b/services/net/notification/TNO.Services.Notification.csproj @@ -20,6 +20,13 @@ + + + + + + + diff --git a/services/net/reporting/TNO.Services.Reporting.csproj b/services/net/reporting/TNO.Services.Reporting.csproj index cce08bea98..2cea01cec9 100644 --- a/services/net/reporting/TNO.Services.Reporting.csproj +++ b/services/net/reporting/TNO.Services.Reporting.csproj @@ -20,6 +20,13 @@ + + + + + + + diff --git a/tools/auto-clipper-harness/.env.sample b/tools/auto-clipper-harness/.env.sample new file mode 100644 index 0000000000..bcc345cbb7 --- /dev/null +++ b/tools/auto-clipper-harness/.env.sample @@ -0,0 +1,16 @@ +# TEMP HARNESS env file. Delete along with this harness when done. +AUTOCLIP_HARNESS_SPEECH_KEY= +AUTOCLIP_HARNESS_SPEECH_REGION=canadacentral +AUTOCLIP_HARNESS_STORAGE_CONNECTION_STRING= +AUTOCLIP_HARNESS_STORAGE_CONTAINER=autoclipper-batch +AUTOCLIP_HARNESS_STORAGE_SAS_MINUTES=180 +AUTOCLIP_HARNESS_BATCH_ENDPOINT= +AUTOCLIP_HARNESS_BATCH_VERSION=v3.2 +AUTOCLIP_HARNESS_BATCH_POLL_SECONDS=10 +AUTOCLIP_HARNESS_BATCH_TIMEOUT_MINUTES=45 +AUTOCLIP_HARNESS_LLM_URL=https://your-resource.openai.azure.com +AUTOCLIP_HARNESS_LLM_KEY= +AUTOCLIP_HARNESS_LLM_DEPLOYMENT= +AUTOCLIP_HARNESS_LLM_VERSION=2024-02-15-preview +AUTOCLIP_HARNESS_LANGUAGE=en-US +AUTOCLIP_HARNESS_MAX_STORIES=5 diff --git a/tools/auto-clipper-harness/.gitignore b/tools/auto-clipper-harness/.gitignore new file mode 100644 index 0000000000..91b68b99a7 --- /dev/null +++ b/tools/auto-clipper-harness/.gitignore @@ -0,0 +1,2 @@ +**/output/ +**/input/ \ No newline at end of file diff --git a/tools/auto-clipper-harness/AutoClipperHarness.csproj b/tools/auto-clipper-harness/AutoClipperHarness.csproj new file mode 100644 index 0000000000..e9f8fc814f --- /dev/null +++ b/tools/auto-clipper-harness/AutoClipperHarness.csproj @@ -0,0 +1,12 @@ + + + + Exe + net9.0 + enable + enable + + + + + diff --git a/tools/auto-clipper-harness/Program.cs b/tools/auto-clipper-harness/Program.cs new file mode 100644 index 0000000000..58aa264191 --- /dev/null +++ b/tools/auto-clipper-harness/Program.cs @@ -0,0 +1,375 @@ +using System.Text; +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using TNO.Services.AutoClipper.Audio; +using TNO.Services.AutoClipper.Azure; +using TNO.Services.AutoClipper.Config; +using TNO.Services.AutoClipper.LLM; + +// TEMP HARNESS: delete this file/project once manual AutoClipper validation is complete. + +var input = args.FirstOrDefault(); +if (string.IsNullOrWhiteSpace(input) || !File.Exists(input)) +{ + Console.WriteLine("Usage: dotnet run --project tools/auto-clipper-harness -- [language] [outputDir]"); + return; +} + +// TEMP HARNESS helper: try to load a .env file automatically so this console app can run standalone. +// When removing this harness, remove the helper as well. +var envFile = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_ENV_FILE") + ?? Path.Combine(AppContext.BaseDirectory, ".env"); +LoadEnvFile(envFile); +if (!File.Exists(envFile)) +{ + // dotnet run from repo root -> fall back to the project-relative .env + var fallback = Path.Combine(Directory.GetCurrentDirectory(), "tools", "auto-clipper-harness", ".env"); + LoadEnvFile(fallback); +} + +var outputDir = args.Length > 2 ? args[2] : Path.Combine(Path.GetDirectoryName(Path.GetFullPath(input)) ?? ".", "auto-clipper-harness-output"); +Directory.CreateDirectory(outputDir); + +using var loggerFactory = LoggerFactory.Create(builder => builder.AddSimpleConsole(o => o.TimestampFormat = "HH:mm:ss ")); +var stationCode = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_STATION") ?? "CKNW"; +var stationConfigPath = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_STATION_PATH") + ?? Path.Combine(Directory.GetCurrentDirectory(), "services", "net", "auto-clipper", "Config", "Stations"); +var stationOptions = Options.Create(new AutoClipperOptions { StationConfigPath = stationConfigPath }); +var stationConfiguration = new StationConfigurationService(stationOptions, loggerFactory.CreateLogger()); +var stationProfile = stationConfiguration.GetProfile(stationCode); + +var language = args.Length > 1 + ? args[1] + : Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_LANGUAGE") + ?? (!string.IsNullOrWhiteSpace(stationProfile.Transcription.Language) ? stationProfile.Transcription.Language : "en-US"); +var sampleRate = int.TryParse(Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_SAMPLE_RATE"), out var sr) + ? sr + : (stationProfile.Transcription.SampleRate > 0 ? stationProfile.Transcription.SampleRate : 16000); + +var audioNormalizer = new AudioNormalizer(loggerFactory.CreateLogger()); +var workingFile = await audioNormalizer.NormalizeAsync(input, sampleRate); + +var options = Options.Create(new AutoClipperOptions +{ + AzureSpeechKey = RequireEnv("AUTOCLIP_HARNESS_SPEECH_KEY"), + AzureSpeechRegion = RequireEnv("AUTOCLIP_HARNESS_SPEECH_REGION"), + AzureSpeechBatchEndpoint = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_BATCH_ENDPOINT") ?? string.Empty, + AzureSpeechBatchApiVersion = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_BATCH_VERSION") ?? "v3.2", + AzureSpeechBatchPollingIntervalSeconds = int.TryParse(Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_BATCH_POLL_SECONDS"), out var batchPollSeconds) ? batchPollSeconds : 10, + AzureSpeechBatchTimeoutMinutes = int.TryParse(Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_BATCH_TIMEOUT_MINUTES"), out var batchTimeoutMinutes) ? batchTimeoutMinutes : 45, + AzureSpeechStorageConnectionString = RequireEnv("AUTOCLIP_HARNESS_STORAGE_CONNECTION_STRING"), + AzureSpeechStorageContainer = RequireEnv("AUTOCLIP_HARNESS_STORAGE_CONTAINER"), + AzureSpeechStorageSasExpiryMinutes = int.TryParse(Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_STORAGE_SAS_MINUTES"), out var sasMinutes) ? sasMinutes : 180, + LlmApiUrl = new Uri(RequireEnv("AUTOCLIP_HARNESS_LLM_URL")), + LlmApiKey = RequireEnv("AUTOCLIP_HARNESS_LLM_KEY"), + LlmPrompt = Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_PROMPT") + ?? (string.IsNullOrWhiteSpace(stationProfile.Text.LlmPrompt) ? string.Empty : stationProfile.Text.LlmPrompt), + MaxStoriesFromClip = int.TryParse(Environment.GetEnvironmentVariable("AUTOCLIP_HARNESS_MAX_STORIES"), out var maxStories) ? maxStories : 5, + VolumePath = Path.GetDirectoryName(Path.GetFullPath(input)) ?? ".", + DefaultTranscriptLanguage = stationProfile.Transcription.Language ?? "en-US" +}); + +var speechLogger = loggerFactory.CreateLogger(); +var llmLogger = loggerFactory.CreateLogger(); +var speechService = new AzureSpeechTranscriptionService(new HttpClient(), options, speechLogger); +var llmService = new ClipSegmentationService(new HttpClient(), options, llmLogger); + +var transcriptionRequest = new SpeechTranscriptionRequest +{ + Language = language, + EnableSpeakerDiarization = stationProfile.Transcription.Diarization, + SpeakerCount = stationProfile.Transcription.MaxSpeakers, + DiarizationMode = stationProfile.Transcription.DiarizationMode +}; + +Console.WriteLine($"[HARNESS] Transcribing {workingFile} ..."); +var segments = await speechService.TranscribeAsync(workingFile, transcriptionRequest, CancellationToken.None); +Console.WriteLine($"[HARNESS] Received {segments.Count} transcript segments"); + +var fullTranscriptBody = BuildTranscriptDocument(segments); +var fullTranscriptPath = Path.Combine(outputDir, "transcript_full.txt"); +await File.WriteAllTextAsync(fullTranscriptPath, fullTranscriptBody ?? string.Empty); +Console.WriteLine($"[HARNESS] Full transcript -> {fullTranscriptPath}"); + +var segmentationSettings = BuildSegmentationSettings(stationProfile); +Console.WriteLine("[HARNESS] Asking LLM for clip definitions ..."); +var promptDebugPath = Path.Combine(outputDir, "llm_prompt_debug.txt"); +await File.WriteAllTextAsync(promptDebugPath, BuildPromptDebug(segmentationSettings, segments)); +Console.WriteLine($"[HARNESS] Saved LLM prompt -> {promptDebugPath}"); +var clipDefinitions = (await llmService.GenerateClipsAsync(segments, segmentationSettings, CancellationToken.None)) + .OrderBy(c => c.Start) + .ToArray(); +Console.WriteLine($"[HARNESS] LLM returned {clipDefinitions.Length} clip candidates"); + +var index = 1; +foreach (var definition in clipDefinitions) +{ + var normalized = NormalizeClipDefinition(definition, segments); + if (normalized == null) + { + Console.WriteLine($"[HARNESS] Skip invalid clip {definition.Title}"); + continue; + } + + var transcriptSlice = ExtractTranscriptRange(segments, normalized.Start, normalized.End); + var transcriptBody = BuildTranscriptDocument(transcriptSlice); + if (string.IsNullOrWhiteSpace(transcriptBody)) + { + Console.WriteLine($"[HARNESS] Empty transcript for clip {definition.Title}"); + continue; + } + + var clipPath = await CreateClipFileAsync(input, outputDir, normalized.Start, normalized.End, index); + var transcriptPath = Path.Combine(outputDir, $"clip_{index:00}.txt"); + await File.WriteAllTextAsync(transcriptPath, transcriptBody); + Console.WriteLine($"[HARNESS] Saved clip #{index} ({normalized.Category}) -> {clipPath}\n[HARNESS] Transcript -> {transcriptPath}"); + index++; +} + +Console.WriteLine("[HARNESS] Complete."); + +static string BuildPromptDebug(ClipSegmentationSettings settings, IReadOnlyList segments) +{ + var builder = new StringBuilder(); + builder.AppendLine("Prompt Override:"); + builder.AppendLine(settings?.PromptOverride ?? ""); + builder.AppendLine(); + builder.AppendLine("Heuristic Patterns:"); + if (settings?.HeuristicPatternEntries != null && settings.HeuristicPatternEntries.Count > 0) + { + foreach (var entry in settings.HeuristicPatternEntries) + { + if (entry == null || string.IsNullOrWhiteSpace(entry.Pattern)) continue; + var weight = entry.Weight ?? settings.HeuristicBoundaryWeight ?? 0; + var meta = new List(); + if (weight > 0) meta.Add($"w={weight:0.00}"); + if (!string.IsNullOrWhiteSpace(entry.Category)) meta.Add($"cat={entry.Category}"); + if (!string.IsNullOrWhiteSpace(entry.Note)) meta.Add(entry.Note!); + var suffix = meta.Count > 0 ? $" ({string.Join(", ", meta)})" : string.Empty; + builder.AppendLine($"- {entry.Pattern}{suffix}"); + } + } + else if (settings?.KeywordPatterns != null && settings.KeywordPatterns.Count > 0) + { + builder.AppendLine(string.Join(", ", settings.KeywordPatterns)); + } + else + { + builder.AppendLine(""); + } + builder.AppendLine(); + + builder.AppendLine("Heuristic Hits:"); + builder.AppendLine(BuildHeuristicHitReport(settings, segments)); + builder.AppendLine(); + builder.AppendLine("Transcript Preview:"); + builder.AppendLine(BuildNumberedTranscript(segments)); + return builder.ToString(); +} + +static string BuildNumberedTranscript(IReadOnlyList segments) +{ + if (segments == null || segments.Count == 0) return string.Empty; + var sb = new StringBuilder(); + for (var i = 0; i < segments.Count; i++) + { + var segment = segments[i]; + if (string.IsNullOrWhiteSpace(segment.Text)) continue; + sb.AppendLine($"{i + 1}. {FormatTimestamp(segment.Start)} --> {FormatTimestamp(segment.End)} :: {segment.Text.Trim()}"); + } + return sb.ToString(); +} + +static string BuildHeuristicHitReport(ClipSegmentationSettings? settings, IReadOnlyList segments) +{ + if (segments == null || segments.Count == 0) return ""; + var patterns = ResolveHeuristicPatternDescriptions(settings); + if (patterns.Count == 0) return ""; + + var hits = new List(); + foreach (var pattern in patterns) + { + try + { + var regex = new Regex(pattern.Pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); + for (var i = 0; i < segments.Count; i++) + { + var sentence = segments[i]; + if (string.IsNullOrWhiteSpace(sentence.Text)) continue; + if (regex.IsMatch(sentence.Text)) + hits.Add($"Sentence {i + 1} matches {pattern.Description}"); + } + } + catch + { + continue; + } + } + + return hits.Count == 0 ? "" : string.Join(Environment.NewLine, hits); +} +static List<(string Pattern, string Description)> ResolveHeuristicPatternDescriptions(ClipSegmentationSettings? settings) +{ + var descriptions = new List<(string Pattern, string Description)>(); + var entries = settings?.HeuristicPatternEntries; + if (entries != null && entries.Count > 0) + { + var baseWeight = settings?.HeuristicBoundaryWeight ?? 0; + foreach (var entry in entries) + { + if (entry == null || string.IsNullOrWhiteSpace(entry.Pattern)) continue; + var weight = entry.Weight ?? baseWeight; + var meta = new List(); + if (weight > 0) meta.Add($"w={weight:0.00}"); + if (!string.IsNullOrWhiteSpace(entry.Category)) meta.Add($"cat={entry.Category}"); + if (!string.IsNullOrWhiteSpace(entry.Note)) meta.Add(entry.Note!); + var description = $"pattern '{entry.Pattern}'"; + if (meta.Count > 0) description += $" ({string.Join(", ", meta)})"; + descriptions.Add((entry.Pattern, description)); + } + return descriptions; + } + + if (settings?.KeywordPatterns == null || settings.KeywordPatterns.Count == 0) return descriptions; + foreach (var pattern in settings.KeywordPatterns) + { + if (string.IsNullOrWhiteSpace(pattern)) continue; + descriptions.Add((pattern, $"pattern '{pattern}'")); + } + return descriptions; +} +static string BuildTranscriptDocument(IReadOnlyList segments) +{ + if (segments == null || segments.Count == 0) return string.Empty; + var sb = new StringBuilder(); + var idx = 1; + foreach (var segment in segments) + { + if (string.IsNullOrWhiteSpace(segment.Text)) continue; + sb.AppendLine(idx.ToString()); + sb.AppendLine($"{FormatTimestamp(segment.Start)} --> {FormatTimestamp(segment.End)}"); + sb.AppendLine(segment.Text.Trim()); + sb.AppendLine(); + idx++; + } + return sb.ToString().Trim(); +} + +static string FormatTimestamp(TimeSpan value) => string.Format("{0:00}:{1:00}:{2:00}.{3:000}", (int)value.TotalHours, value.Minutes, value.Seconds, value.Milliseconds); + +static async Task CreateClipFileAsync(string srcFile, string outputDir, TimeSpan start, TimeSpan end, int index) +{ + Directory.CreateDirectory(outputDir); + var dest = Path.Combine(outputDir, $"clip_{index:00}{Path.GetExtension(srcFile)}"); + var durationSeconds = Math.Max(1, (end - start).TotalSeconds); + var process = new System.Diagnostics.Process(); + if (IsWindows()) + { + process.StartInfo.FileName = "cmd"; + process.StartInfo.Arguments = $"/c ffmpeg -y -ss {start.TotalSeconds:0.###} -i \"{srcFile}\" -t {durationSeconds:0.###} -c copy \"{dest}\""; + } + else + { + process.StartInfo.FileName = "/bin/sh"; + process.StartInfo.Arguments = $"-c \"ffmpeg -y -ss {start.TotalSeconds:0.###} -i '{srcFile}' -t {durationSeconds:0.###} -c copy '{dest}' 2>&1\""; + } + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardOutput = true; + process.StartInfo.CreateNoWindow = true; + process.Start(); + var output = await process.StandardOutput.ReadToEndAsync(); + await process.WaitForExitAsync(); + if (process.ExitCode != 0) throw new InvalidOperationException($"ffmpeg failed: {output}"); + return dest; +} + +static bool IsWindows() => OperatingSystem.IsWindows(); + + + + + + + +static ClipSegmentationSettings BuildSegmentationSettings(StationProfile profile) +{ + return new ClipSegmentationSettings + { + PromptOverride = string.IsNullOrWhiteSpace(profile.Text.LlmPrompt) ? null : profile.Text.LlmPrompt, + ModelOverride = string.IsNullOrWhiteSpace(profile.Text.LlmModel) ? null : profile.Text.LlmModel, + TemperatureOverride = profile.Text.LlmTemperature, + SystemPrompt = string.IsNullOrWhiteSpace(profile.Text.SystemPrompt) ? null : profile.Text.SystemPrompt, + PromptCharacterLimit = profile.Text.PromptCharacterLimit, + MaxStories = profile.Text.MaxStories, + KeywordPatterns = profile.Heuristics.KeywordPatterns?.ToArray(), + HeuristicPatternEntries = profile.Heuristics.PatternEntries? + .Where(p => p != null && !string.IsNullOrWhiteSpace(p.Pattern)) + .Select(p => new HeuristicPatternSetting + { + Pattern = p.Pattern!, + Weight = p.Weight, + Category = string.IsNullOrWhiteSpace(p.Category) ? null : p.Category, + Note = p.Note + }) + .ToArray(), + HeuristicBoundaryWeight = profile.Text.HeuristicBoundaryWeight, + KeywordCategories = profile.Text.KeywordCategories?.ToDictionary(kvp => kvp.Key, kvp => kvp.Value) + }; +} +static string RequireEnv(string key) +{ + var value = Environment.GetEnvironmentVariable(key); + if (string.IsNullOrWhiteSpace(value)) throw new InvalidOperationException($"Environment variable '{key}' must be set for the AutoClipper harness."); + return value; +} + +static void LoadEnvFile(string path) +{ + if (!File.Exists(path)) return; + foreach (var rawLine in File.ReadAllLines(path)) + { + var line = rawLine.Trim(); + if (string.IsNullOrWhiteSpace(line) || line.StartsWith("#")) continue; + var separator = line.IndexOf('='); + if (separator <= 0) continue; + var key = line[..separator].Trim(); + var value = line[(separator + 1)..].Trim(); + Environment.SetEnvironmentVariable(key, value); + } +} + + +static ClipDefinition? NormalizeClipDefinition(ClipDefinition definition, IReadOnlyList segments) +{ + if (segments.Count == 0) return null; + var maxEnd = segments[^1].End; + var start = definition.Start < TimeSpan.Zero ? TimeSpan.Zero : definition.Start; + var end = definition.End > maxEnd ? maxEnd : definition.End; + if (end <= start) return null; + + var first = segments.FirstOrDefault(s => s.End > start); + var last = segments.LastOrDefault(s => s.Start < end); + if (first == null || last == null) return null; + start = first.Start; + end = last.End; + return end <= start ? null : definition with { Start = start, End = end }; +} + +static IReadOnlyList ExtractTranscriptRange(IReadOnlyList segments, TimeSpan start, TimeSpan end) + => segments.Where(s => s.End > start && s.Start < end).ToArray(); + + + + + + + + + + + + + + + diff --git a/tools/auto-clipper-harness/README.md b/tools/auto-clipper-harness/README.md new file mode 100644 index 0000000000..9332233014 --- /dev/null +++ b/tools/auto-clipper-harness/README.md @@ -0,0 +1,23 @@ +# AutoClipper Harness + +The harness is a standalone console app that mirrors the AutoClipper pipeline for manual validation. It +normalizes a local media file, runs Azure Speech transcription, feeds the transcript and station heuristics to the +segmenter, and writes clips/transcripts/prompt debug files for inspection. + +## Usage + +`dotnet run --project tools/auto-clipper-harness -- [language] [outputDir]` + +- Configure Azure keys and LLM settings via .env (see .env.sample). +- Provide AUTOCLIP_HARNESS_STORAGE_CONNECTION_STRING / AUTOCLIP_HARNESS_STORAGE_CONTAINER so the harness can upload audio for Azure batch transcription. +- Optional overrides: AUTOCLIP_HARNESS_BATCH_ENDPOINT, \_BATCH_VERSION, \_BATCH_POLL_SECONDS, \_BATCH_TIMEOUT_MINUTES, and \_STORAGE_SAS_MINUTES. +- Station profiles are loaded from services/net/auto-clipper/Config/Stations by default; override with + AUTOCLIP_HARNESS_STATION_PATH / AUTOCLIP_HARNESS_STATION. +- Outputs: clip_XX.\* media slices, clip_XX.txt transcripts, ranscript_full.txt, and + llm_prompt_debug.txt (shows numbered transcript, heuristics, and the final prompt). + +## Notes + +- The harness shares the segmentation logic with the service, so any changes in ClipSegmentationService + should be validated here first. +- Ensure ffmpeg is available on PATH; the harness shells out to ffmpeg to produce media clips. diff --git a/tools/auto-clipper-harness/ResponseCaptureHandler.cs b/tools/auto-clipper-harness/ResponseCaptureHandler.cs new file mode 100644 index 0000000000..ccf5e7bf7c --- /dev/null +++ b/tools/auto-clipper-harness/ResponseCaptureHandler.cs @@ -0,0 +1,31 @@ +using System.IO; +using System.Net.Http; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +internal sealed class ResponseCaptureHandler : DelegatingHandler +{ + private readonly string _path; + + public ResponseCaptureHandler(string path) : base(new HttpClientHandler()) + { + _path = path; + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + var response = await base.SendAsync(request, cancellationToken).ConfigureAwait(false); + try + { + var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + await File.WriteAllTextAsync(_path, body, cancellationToken).ConfigureAwait(false); + response.Content = new StringContent(body, Encoding.UTF8, response.Content.Headers?.ContentType?.MediaType ?? "application/json"); + } + catch + { + } + + return response; + } +} diff --git a/tools/auto-clipper-harness/auto-clipper-harness.sln b/tools/auto-clipper-harness/auto-clipper-harness.sln new file mode 100644 index 0000000000..1e503c9fc0 --- /dev/null +++ b/tools/auto-clipper-harness/auto-clipper-harness.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.2.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AutoClipperHarness", "AutoClipperHarness.csproj", "{F4352B88-F210-DEBA-5586-EB19180A94EB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F4352B88-F210-DEBA-5586-EB19180A94EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F4352B88-F210-DEBA-5586-EB19180A94EB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F4352B88-F210-DEBA-5586-EB19180A94EB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F4352B88-F210-DEBA-5586-EB19180A94EB}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {D27B7147-0BAE-4EEB-87B3-E2C580C9B07C} + EndGlobalSection +EndGlobal diff --git a/tools/scripts/gen-env-files.sh b/tools/scripts/gen-env-files.sh index eb3161683d..4bce5f65ae 100755 --- a/tools/scripts/gen-env-files.sh +++ b/tools/scripts/gen-env-files.sh @@ -69,6 +69,7 @@ CONTENTMIGRATION_PORT=$portContentMigration INDEXING_PORT=$portIndexing IMAGE_PORT=$portImage TRANSCRIPTION_PORT=$portTranscription +AUTO_CLIPPER_PORT=$portAutoClipper NLP_PORT=$portNlp CORENLP_PORT=$portCoreNlp NOTIFICATION_PORT=$portNotification @@ -540,6 +541,45 @@ Kafka__BootstrapServers=host.docker.internal:$portKafkaBrokerAdvertisedExternal" echo "./services/net/transcription/.env created" fi +## Auto Clipper Service +if test -f "./services/net/auto-clipper/.env"; then + echo "./services/net/auto-clipper/.env exists" +else +echo \ +"ASPNETCORE_ENVIRONMENT=Development +ASPNETCORE_URLS=http://+:8081 + +Auth__Keycloak__Authority=http://host.docker.internal:$portKeycloak +Auth__Keycloak__Audience=mmi-service-account +Auth__Keycloak__Secret={YOU WILL NEED TO GET THIS FROM KEYCLOAK} +Auth__OIDC__Token=/realms/mmi/protocol/openid-connect/token + +Service__ApiUrl=http://host.docker.internal:$portApi/api + +CHES__AuthUrl=https://dev.loginproxy.gov.bc.ca/auth/realms/comsvcauth/protocol/openid-connect/token +CHES__HostUri=https://ches-dev.api.gov.bc.ca/api/v1 +CHES__Username={YOU WILL NEED TO GET THIS FROM CHES} +CHES__Password={YOU WILL NEED TO GET THIS FROM CHES} +CHES__EmailAuthorized=true +# CHES__OverrideTo= + +Kafka__BootstrapServers=host.docker.internal:$portKafkaBrokerAdvertisedExternal + +# Configure Azure Speech Service +Service__AzureSpeechKey={ENTER A VALID AZURE KEY} +Service__AzureSpeechRegion=westus + +Service__AzureSpeechStorageConnectionString={Connection String} +Service__AzureSpeechStorageContainer=batch-transcripts + +# Configure Azure OpenAI/Foundary LLM Service +Service__LlmApiUrl=https://mmiopenai.cognitiveservices.azure.com +Service__LlmApiKey={ENTER A VALID AZURE KEY} +Service__LlmDefaultModel=gpt-5.1-chat +Service__LlmPrompt=" >> ./services/net/auto-clipper/.env + echo "./services/net/auto-clipper/.env created" +fi + ## Indexing Service if test -f "./services/net/indexing/.env"; then echo "./services/net/indexing/.env exists" diff --git a/tools/scripts/variables.sh b/tools/scripts/variables.sh index 2c3e4fdc46..e9c7fa765d 100755 --- a/tools/scripts/variables.sh +++ b/tools/scripts/variables.sh @@ -173,6 +173,7 @@ export portContent=40025 export portIndexing=40026 export portTranscription=40027 export portNlp=40028 +export portAutoClipper=40029 export portCoreNlp=40038 export portNotification=40030 export portReporting=40031