From b401d6e390cc3163460244777826a8e5abc066d9 Mon Sep 17 00:00:00 2001 From: Piotr Konopka Date: Tue, 29 Apr 2025 09:46:37 +0200 Subject: [PATCH] Remove Calculators They were needed for some theoretical considerations in my PhD, no need to keep them anymore. --- Framework/CMakeLists.txt | 7 - .../include/QualityControl/Calculators.h | 56 ------- Framework/src/Calculators.cxx | 144 ------------------ Framework/src/runLocationCalculator.cxx | 125 --------------- Framework/src/runMergerCalculator.cxx | 86 ----------- 5 files changed, 418 deletions(-) delete mode 100644 Framework/include/QualityControl/Calculators.h delete mode 100644 Framework/src/Calculators.cxx delete mode 100644 Framework/src/runLocationCalculator.cxx delete mode 100644 Framework/src/runMergerCalculator.cxx diff --git a/Framework/CMakeLists.txt b/Framework/CMakeLists.txt index 7666082d51..9b9961b5d6 100644 --- a/Framework/CMakeLists.txt +++ b/Framework/CMakeLists.txt @@ -116,7 +116,6 @@ add_library(O2QualityControl src/UpdatePolicyManager.cxx src/AdvancedWorkflow.cxx src/QualitiesToFlagCollectionConverter.cxx - src/Calculators.cxx src/DataSourceSpec.cxx src/RootFileSink.cxx src/RootFileSource.cxx @@ -201,8 +200,6 @@ set(EXE_SRCS src/runRepositoryBenchmark.cxx src/runPostProcessing.cxx src/runPostProcessingOCC.cxx - src/runLocationCalculator.cxx - src/runMergerCalculator.cxx src/runUploadRootObjects.cxx src/runFileMerger.cxx src/runMetadataUpdater.cxx @@ -219,8 +216,6 @@ set(EXE_NAMES o2-qc-repository-benchmark o2-qc-run-postprocessing o2-qc-run-postprocessing-occ - o2-qc-location-calculator - o2-qc-merger-calculator o2-qc-upload-root-objects o2-qc-file-merger o2-qc-metadata-updater @@ -239,8 +234,6 @@ set(EXE_OLD_NAMES repositoryBenchmark qcRunPostProcessing qcRunPostProcessingOCC - o2-qc-location-calculator - o2-qc-merger-calculator o2-qc-upload-root-objects o2-qc-file-merger o2-qc-metadata-updater diff --git a/Framework/include/QualityControl/Calculators.h b/Framework/include/QualityControl/Calculators.h deleted file mode 100644 index 6efe55e380..0000000000 --- a/Framework/include/QualityControl/Calculators.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// -/// \file Calculators.h -/// \author Piotr Konopka -/// -/// \brief Bunch of formulas for theoretical calculations for finding optimal QC topologies. - -#ifndef QUALITYCONTROL_CALCULATORS_H -#define QUALITYCONTROL_CALCULATORS_H - -#include -#include - -namespace o2::quality_control::calculators -{ - -// average M/D/1 queue size, rho is server utilisation ( input rate / processing rate ) -double averageMD1Queue(double rho); - -// average M/G/1 queue size -// rho is server utilisation ( input rate / processing rate ) -// mean is the mean processing time -// stddev is the standard deviation of the processing time -double averageMG1Queue(double rho, double mean, double stddev); - -// number of merger layes, M0 is number of producers, R is max reduction factor -size_t numberOfMergerLayers(size_t M0, size_t R); -double mergersMemoryUsage(size_t R, size_t M0, size_t objSize, double T, const std::function& performance); - -double mergersCpuUsage(size_t R, size_t M0, double T, const std::function& performance); - -// returns the cost of CPU and RAM of the full merger topology -std::tuple mergerCosts(double costCPU, double costRAM, size_t R, int parallelism, int mosSize, - double cycleDuration, const std::function& performance); - -// Returns the best Reduction factor (R) for given conditions and total cost of CPU and RAM. -// If there is a range of equally good reduction factors, it will return the highest. -std::tuple cheapestMergers(double costCPU, double costRAM, int parallelism, int mosSize, - double cycleDuration, const std::function& performance); - -double qcTaskInputMemory(double utilisation, double avgInputMessage, double stddevInputMessage); - -double qcTaskCost(double costCPU, double costRAM, double qcTaskCPU, size_t qcTaskRAM, double parallelData, double avgInputMessage, double stddevInputMessage); - -} // namespace o2::quality_control::calculators -#endif //QUALITYCONTROL_CALCULATORS_H diff --git a/Framework/src/Calculators.cxx b/Framework/src/Calculators.cxx deleted file mode 100644 index 8107507e7a..0000000000 --- a/Framework/src/Calculators.cxx +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// -/// \file Calculators.cxx -/// \author Piotr Konopka -/// -/// \brief Bunch of formulas for theoretical calculations for finding optimal QC topologies. -#include "QualityControl/Calculators.h" -#include - -namespace o2::quality_control::calculators -{ - -// average M/D/1 queue size, rho is server utilisation ( input rate / processing rate ) -double averageMD1Queue(double rho) -{ - return rho < 1 ? rho * rho / 2.0 / (1.0 - rho) : std::numeric_limits::infinity(); -} - -// average M/G/1 queue size -// rho is server utilisation ( input rate / processing rate ) -// mean is the mean processing time -// stddev is the standard deviation of the processing time -double averageMG1Queue(double rho, double mean, double stddev) -{ - return rho < 1 ? rho * rho / 2.0 / (1.0 - rho) * (1 + (stddev * stddev) / (mean * mean)) : std::numeric_limits::infinity(); -} - -// number of merger layes, M0 is number of producers, R is max reduction factor -size_t numberOfMergerLayers(size_t M0, size_t R) -{ - // we benefit from the fact that log_a(b) = log_c(b) / log_c(a) - return std::ceil(std::log((double)M0) / std::log((double)R)); -} - -double mergersMemoryUsage(size_t R, size_t M0, size_t objSize, double T, const std::function& performance) -{ - const size_t layers = numberOfMergerLayers(M0, R); - - double averageObjects = 0; - size_t Mi = M0; - - for (size_t layer = 1; layer <= layers; layer++) { - const size_t Mi_prev = Mi; - Mi = std::ceil(Mi_prev / (double)R); - const double Ri = Mi_prev / (double)Mi; - const double rho = Ri / (double)T / performance(Ri); - - if (rho >= 1) { - // if utilisation becomes > 1, then the queue will grow infinitely. - averageObjects = std::numeric_limits::infinity(); - break; - } - - averageObjects += Mi * (averageMD1Queue(rho) + rho + 1); // should it be "2" actually? Average entities in the system + merged object is better - } - - double memory = averageObjects * objSize; - return memory; -} - -double mergersCpuUsage(size_t R, size_t M0, double T, const std::function& performance) -{ - const size_t layers = numberOfMergerLayers(M0, R); - double cores = 0.0; - - size_t Mi = M0; - for (size_t layer = 1; layer <= layers; layer++) { - const size_t Mi_prev = Mi; - const size_t Mi = std::ceil(Mi_prev / (double)R); - const double Ri = Mi_prev / (double)Mi; - const double rho = Ri / (double)T / performance(Ri); - - if (rho >= 1) { - cores = std::numeric_limits::infinity(); - break; - } - - cores += Mi * rho; - } - - return cores; -} - -// returns the cost of CPU and RAM of the full merger topology -std::tuple mergerCosts(double costCPU, double costRAM, size_t R, int parallelism, int mosSize, - double cycleDuration, const std::function& performance) -{ - double mergersCPUCost = costCPU * mergersCpuUsage(R, parallelism, cycleDuration, performance); - double mergersfMemoryCost = costRAM * mergersMemoryUsage(R, parallelism, mosSize, cycleDuration, performance); - return { mergersCPUCost, mergersfMemoryCost }; -} - -// Returns the best Reduction factor (R) for given conditions and total cost of CPU and RAM. -// If there is a range of equally good reduction factors, it will return the highest. -std::tuple cheapestMergers(double costCPU, double costRAM, int parallelism, int mosSize, - double cycleDuration, const std::function& performance) -{ - size_t bestR = -1; - double lowestCPUCost = std::numeric_limits::max(); - double lowestRAMCost = std::numeric_limits::max(); - double lowestTotalCost = std::numeric_limits::max(); - for (size_t R = 2; R <= (size_t)parallelism; R++) { - auto [costOfCPU, costOfMemory] = mergerCosts(costCPU, costRAM, R, parallelism, mosSize, cycleDuration, performance); - double totalCost = costOfMemory + costOfCPU; - // ILOG(Info) << "R: " << R << " total cost: " << totalCost << ENDM; - - if (totalCost <= lowestTotalCost) { - lowestTotalCost = totalCost; - lowestCPUCost = costOfCPU; - lowestRAMCost = costOfMemory; - bestR = R; - } - } - return { bestR, lowestCPUCost, lowestRAMCost }; -} - -double qcTaskInputMemory(double utilisation, double avgInputMessage, double stddevInputMessage) -{ - // we can use avgInputMessage and stddevInputMessage (which are in Bytes) instead of processing times, - // because we assume that processing time is proportional to message sizes, this task throughput would cancel out, - // being both in numerator and denominator. - return avgInputMessage * (averageMG1Queue(utilisation, avgInputMessage, stddevInputMessage) + utilisation); -} - -double qcTaskCost(double costCPU, double costRAM, double qcTaskCPU, size_t qcTaskRAM, - double parallelData, double avgInputMessage, double stddevInputMessage) -{ - auto utilisation = parallelData * qcTaskCPU; - auto inputMemory = qcTaskInputMemory(utilisation, avgInputMessage, stddevInputMessage); - - return costCPU * utilisation + costRAM * (inputMemory + qcTaskRAM); -} - -} // namespace o2::quality_control::calculators \ No newline at end of file diff --git a/Framework/src/runLocationCalculator.cxx b/Framework/src/runLocationCalculator.cxx deleted file mode 100644 index 4e7298a81b..0000000000 --- a/Framework/src/runLocationCalculator.cxx +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// -/// \file runLocationCalculator.cxx -/// \author Piotr Konopka -/// -/// \brief This is a calculator for the tasks cost when it is run locally and remotely - -#include "QualityControl/Calculators.h" -#include - -#include - -using namespace o2::quality_control::calculators; -namespace bpo = boost::program_options; - -int main(int argc, const char* argv[]) -{ - try { - bpo::options_description desc{ "Options" }; - desc.add_options() // - ("help,h", "Help screen") // - ("cost-cpu,cp", bpo::value()->default_value(118.0), "Cost of CPU [currency/CPU]") // - ("cost-bandwidth,cb", bpo::value()->default_value(0.76), "Cost of bandwidth [currency/MB/s]") // - ("cost-ram,cm", bpo::value()->default_value(0.005), "Cost of RAM [currency/MB]") // - ("parallelism,p", bpo::value()->default_value(500), "Number of parallel nodes []") // - ("parallel-data,D", bpo::value()->default_value(0.19), "Parallel data stream size (QC Task input) [MB/s]") // - ("avg-data-message,Dm", bpo::value()->default_value(1), "Average data message size [MB]") // - ("stddev-data-message,Ds", bpo::value()->default_value(1), "Std dev of data message size [MB]") // - ("mos-size,mo", bpo::value()->default_value(100), "Size of all MonitorObjects produced by one QC Task [MB]") // - ("cycle-duration,T", bpo::value()->default_value(60.0), "Cycle duration [s]") // - ("qc-task-cpu,qp", bpo::value()->default_value(0.01), "CPU usage of a QC Task per data throughput [CPU/MB/s]") // - ("qc-task-ram,qm", bpo::value()->default_value(250), "RAM usage of an idle QC Task [MB]") // - ("merger-performance,mu", bpo::value()->default_value(25.0), "Number of objects per second which can be merged by one Merger"); - - bpo::variables_map vm; - store(parse_command_line(argc, argv, desc), vm); - notify(vm); - - if (vm.count("help")) { - std::cout << desc << std::endl; - return 0; - } - - const auto costCPU = vm["cost-cpu"].as(); - const auto costBandwidth = vm["cost-bandwidth"].as(); - const auto costRAM = vm["cost-ram"].as(); - const auto parallelism = vm["parallelism"].as(); - const auto parallelData = vm["parallel-data"].as(); - const auto avgDataMessage = vm["avg-data-message"].as(); - const auto stdDevDataMessage = vm["stddev-data-message"].as(); - const auto mosSize = vm["mos-size"].as(); - const auto cycleDuration = vm["cycle-duration"].as(); - const auto qcTaskCPU = vm["qc-task-cpu"].as(); - const auto qcTaskRAM = vm["qc-task-ram"].as(); - const auto mergerPerformance = vm["merger-performance"].as(); - - std::cout << "PARAMETERS" << std::endl; - std::cout << "costCPU, " << costCPU << std::endl; - std::cout << "costBandwidth, " << costBandwidth << std::endl; - std::cout << "costRAM, " << costRAM << std::endl; - std::cout << "parallelism, " << parallelism << std::endl; - std::cout << "parallelData, " << parallelData << std::endl; - std::cout << "avgDataMessage, " << avgDataMessage << std::endl; - std::cout << "stdDevDataMessage, " << stdDevDataMessage << std::endl; - std::cout << "mosSize, " << mosSize << std::endl; - std::cout << "cycleDuration, " << cycleDuration << std::endl; - std::cout << "qcTaskCPU, " << qcTaskCPU << std::endl; - std::cout << "qcTaskRAM, " << qcTaskRAM << std::endl; - std::cout << "mergerPerformance, " << mergerPerformance << std::endl; - std::cout << std::endl; - - // Computing the local variant - { - auto localQCTaskCost = qcTaskCost(costCPU, costRAM, qcTaskCPU, qcTaskRAM, parallelData, avgDataMessage, stdDevDataMessage); - auto localCost = parallelism * localQCTaskCost; - - auto transportCost = costBandwidth * parallelism * mosSize / cycleDuration; - - // todo: allow to specify R in arguments - auto [R, costCPUMergers, costRAMMergers] = cheapestMergers(costCPU, costRAM, parallelism, mosSize, cycleDuration, [mergerPerformance](double) { return mergerPerformance; }); - auto remoteCost = costCPUMergers + costRAMMergers; - auto totalCost = localCost + transportCost + remoteCost; - - std::cout << "RESULTS LOCAL" << std::endl; - std::cout << "R, " << R << std::endl; - std::cout << "localCost, " << localCost << std::endl; - std::cout << "transportCost, " << transportCost << std::endl; - std::cout << "remoteCost, " << remoteCost << std::endl; - std::cout << "totalCost, " << totalCost << std::endl; - } - - // Computing the remote variant - { - auto localCost = 0.0; - auto transportCost = costBandwidth * parallelism * parallelData; - auto remoteCost = qcTaskCost(costCPU, costRAM, qcTaskCPU, qcTaskRAM, parallelism * parallelData, avgDataMessage, stdDevDataMessage); - auto totalCost = localCost + transportCost + remoteCost; - - std::cout << "RESULTS REMOTE" << std::endl; - std::cout << "localCost, " << localCost << std::endl; - std::cout << "transportCost, " << transportCost << std::endl; - std::cout << "remoteCost, " << remoteCost << std::endl; - std::cout << "totalCost, " << totalCost << std::endl; - } - - return 0; - } catch (const bpo::error& ex) { - std::cerr << "Exception caught: " << ex.what() << std::endl; - return 1; - } - - return 0; -} - -// -0.378347x + 2782.30 \ No newline at end of file diff --git a/Framework/src/runMergerCalculator.cxx b/Framework/src/runMergerCalculator.cxx deleted file mode 100644 index 030efe5c21..0000000000 --- a/Framework/src/runMergerCalculator.cxx +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// -/// \file runMergerCalculator.cxx -/// \author Piotr Konopka -/// -/// \brief This is a calculator for finding an optimal reduction factor for Mergers - -#include "QualityControl/QcInfoLogger.h" -#include "QualityControl/Calculators.h" -#include - -using namespace o2::quality_control::core; -using namespace o2::quality_control::calculators; -namespace bpo = boost::program_options; - -int main(int argc, const char* argv[]) -{ - try { - bpo::options_description desc{ "Options" }; - desc.add_options() // - ("help,h", "Help screen") // - ("cost-cpu,cp", bpo::value()->default_value(118.0), "Cost of CPU [currency/CPU]") // - ("cost-ram,cm", bpo::value()->default_value(0.0065), "Cost of RAM [currency/MB]") // - ("parallelism,p", bpo::value()->default_value(2500), "Number of parallel nodes []") // - ("mos-size,mo", bpo::value()->default_value(500), "Size of all MonitorObjects produced by one QC Task [MB]") // - ("cycle-duration,T", bpo::value()->default_value(60.0), "Cycle duration [s]") // - ("merger-performance,mu", bpo::value()->default_value(25.0), "Number of objects per second which can be merged by one Merger"); - - bpo::variables_map vm; - store(parse_command_line(argc, argv, desc), vm); - notify(vm); - - if (vm.count("help")) { - std::cout << desc << std::endl; - return 0; - } - - const auto costCPU = vm["cost-cpu"].as(); - const auto costRAM = vm["cost-ram"].as(); - const auto parallelism = vm["parallelism"].as(); - const auto mosSize = vm["mos-size"].as(); - const auto cycleDuration = vm["cycle-duration"].as(); - const auto mergerPerformance = vm["merger-performance"].as(); - - std::cout << "PARAMETERS" << std::endl; - std::cout << "costCPU, " << costCPU << std::endl; - std::cout << "costRAM, " << costRAM << std::endl; - std::cout << "parallelism, " << parallelism << std::endl; - std::cout << "mosSize, " << mosSize << std::endl; - std::cout << "cycleDuration, " << cycleDuration << std::endl; - std::cout << "mergerPerformance, " << mergerPerformance << std::endl; - - auto performance = [=](double /* Ri */) { - // We assume the same performance regardless of the number of inputs, - // but we could do something like this: - // return -0.002 * Ri + 24; - return mergerPerformance; - }; - - std::cout << "RESULTS" << std::endl; - std::cout << "R , costOfMemory , costOfCPU" << std::endl; - for (size_t R = 2; R <= (size_t)parallelism; R++) { - double costOfMemory = costRAM * mergersMemoryUsage(R, parallelism, mosSize, cycleDuration, performance); - double costOfCPU = costCPU * mergersCpuUsage(R, parallelism, cycleDuration, performance); - double totalCost = costOfMemory + costOfCPU; - std::cout << R << " , " << costOfMemory << " , " << costOfCPU << " , " << totalCost << std::endl; - } - - return 0; - } catch (const bpo::error& ex) { - std::cerr << "Exception caught: " << ex.what() << std::endl; - return 1; - } - - return 0; -} \ No newline at end of file