From d0447a9207d23173d06fbd3ed81af20796ac3476 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 20 Apr 2021 16:31:13 +0530 Subject: [PATCH 01/16] Initial changes --- src/k8s-troubleshoot/HISTORY.rst | 8 + src/k8s-troubleshoot/README.rst | 5 + .../azext_k8s_troubleshoot/__init__.py | 32 +++ .../azext_k8s_troubleshoot/_client_factory.py | 20 ++ .../azext_k8s_troubleshoot/_constants.py | 19 ++ .../azext_k8s_troubleshoot/_help.py | 38 ++++ .../azext_k8s_troubleshoot/_params.py | 22 ++ .../azext_k8s_troubleshoot/_utils.py | 209 ++++++++++++++++++ .../azext_k8s_troubleshoot/_validators.py | 20 ++ .../azext_metadata.json | 3 + .../azext_k8s_troubleshoot/commands.py | 29 +++ .../azext_k8s_troubleshoot/custom.py | 90 ++++++++ .../azext_k8s_troubleshoot/tests/__init__.py | 5 + .../tests/latest/__init__.py | 5 + .../latest/test_k8s-troubleshoot_scenario.py | 40 ++++ src/k8s-troubleshoot/setup.cfg | 2 + src/k8s-troubleshoot/setup.py | 60 +++++ 17 files changed, 607 insertions(+) create mode 100644 src/k8s-troubleshoot/HISTORY.rst create mode 100644 src/k8s-troubleshoot/README.rst create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py create mode 100644 src/k8s-troubleshoot/setup.cfg create mode 100644 src/k8s-troubleshoot/setup.py diff --git a/src/k8s-troubleshoot/HISTORY.rst b/src/k8s-troubleshoot/HISTORY.rst new file mode 100644 index 00000000000..8c34bccfff8 --- /dev/null +++ b/src/k8s-troubleshoot/HISTORY.rst @@ -0,0 +1,8 @@ +.. :changelog: + +Release History +=============== + +0.1.0 +++++++ +* Initial release. \ No newline at end of file diff --git a/src/k8s-troubleshoot/README.rst b/src/k8s-troubleshoot/README.rst new file mode 100644 index 00000000000..3f7ec4b808e --- /dev/null +++ b/src/k8s-troubleshoot/README.rst @@ -0,0 +1,5 @@ +Microsoft Azure CLI 'k8s-troubleshoot' Extension +========================================== + +This package is for the 'k8s-troubleshoot' extension. +i.e. 'az k8s-troubleshoot' \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py new file mode 100644 index 00000000000..61e44675a3d --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py @@ -0,0 +1,32 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from azure.cli.core import AzCommandsLoader + +from azext_k8s_troubleshoot._help import helps # pylint: disable=unused-import + + +class K8s_troubleshootCommandsLoader(AzCommandsLoader): + + def __init__(self, cli_ctx=None): + from azure.cli.core.commands import CliCommandType + from azext_k8s_troubleshoot._client_factory import cf_k8s_troubleshoot + k8s-troubleshoot_custom = CliCommandType( + operations_tmpl='azext_k8s-troubleshoot.custom#{}', + client_factory=cf_k8s-troubleshoot) + super(K8s-troubleshootCommandsLoader, self).__init__(cli_ctx=cli_ctx, + custom_command_type=k8s-troubleshoot_custom) + + def load_command_table(self, args): + from azext_k8s_troubleshoot.commands import load_command_table + load_command_table(self, args) + return self.command_table + + def load_arguments(self, command): + from azext_k8s-troubleshoot._params import load_arguments + load_arguments(self, command) + + +COMMAND_LOADER_CLS = K8s_troubleshootCommandsLoader diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py new file mode 100644 index 00000000000..5d54bed4404 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py @@ -0,0 +1,20 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +from azure.cli.core.commands.client_factory import get_mgmt_service_client + + +def _resource_client_factory(cli_ctx, subscription_id=None): + return get_mgmt_service_client(cli_ctx, ResourceType.MGMT_RESOURCE_RESOURCES, subscription_id=subscription_id) + + +def _resource_providers_client(cli_ctx): + from azure.mgmt.resource import ResourceManagementClient + return get_mgmt_service_client(cli_ctx, ResourceManagementClient).providers + + # Alternate: This should also work + # subscription_id = get_subscription_id(cli_ctx) + # return get_mgmt_service_client(cli_ctx, ResourceType.MGMT_RESOURCE_RESOURCES, subscription_id=subscription_id).providers diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py new file mode 100644 index 00000000000..dca9654731c --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py @@ -0,0 +1,19 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +# pylint: disable=line-too-long + +Connected_Cluster_Provider_Namespace = 'Microsoft.Kubernetes' +Kubernetes_Configuration_Provider_Namespace = 'Microsoft.KubernetesConfiguration' +Custom_Locations_Provider_Namespace = 'Microsoft.ExtendedLocation' +DEFAULT_REQUEST_TIMEOUT = 10 # seconds + +# Custom fault types + +Load_Kubeconfig_Fault_Type = "Error while loading kubeconfig" + +# URL constants +Kubernetes_Github_Latest_Release_Uri = "https://api.github.com/repos/kubernetes/kubernetes/releases/latest" \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py new file mode 100644 index 00000000000..af7d1ea7365 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py @@ -0,0 +1,38 @@ +# coding=utf-8 +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from knack.help_files import helps # pylint: disable=unused-import + + +helps['k8s-troubleshoot'] = """ + type: group + short-summary: Commands to manage K8s-troubleshoots. +""" + +helps['k8s-troubleshoot create'] = """ + type: command + short-summary: Create a K8s-troubleshoot. +""" + +helps['k8s-troubleshoot list'] = """ + type: command + short-summary: List K8s-troubleshoots. +""" + +# helps['k8s-troubleshoot delete'] = """ +# type: command +# short-summary: Delete a K8s-troubleshoot. +# """ + +# helps['k8s-troubleshoot show'] = """ +# type: command +# short-summary: Show details of a K8s-troubleshoot. +# """ + +# helps['k8s-troubleshoot update'] = """ +# type: command +# short-summary: Update a K8s-troubleshoot. +# """ diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py new file mode 100644 index 00000000000..c4ce0482c5e --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py @@ -0,0 +1,22 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +# pylint: disable=line-too-long + +import os.path +from azure.cli.core.commands.parameters import get_location_type, file_type +from azure.cli.core.commands.validators import get_default_location_from_resource_group + + +def load_arguments(self, _): + + with self.argument_context('k8s-troubleshoot diagnose') as c: + c.argument('location', arg_type=get_location_type(self.cli_ctx), validator=get_default_location_from_resource_group) + c.argument('cluster_name', options_list=['--name', '-n'], id_part='name', help='The name of the connected cluster.') + c.argument('kube_config', options_list=['--kube-config'], help='Path to the kube config file.') + c.argument('kube_context', options_list=['--kube-context'], help='Kubconfig context from current machine.') + c.argument('storage_account', options_list=['--storage-account'], help='Name or ID of the storage account to save the diagnostic information') + c.argument('sas_token', options_list=['--sas-token'], help='The SAS token with writable permission for the storage account.') + c.argument('output_file', options_list=['--output-file'], type=file_type, default=os.path.join(os.path.expanduser('~'), '.azure', 'az_connectedk8s_troubleshoot_output.tar.gz'), help="Output zipped file path for the logs collected during troubleshoot.") + diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py new file mode 100644 index 00000000000..c5f45f62e5a --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -0,0 +1,209 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from kubernetes import client as kube_client, config +from azure.cli.core import telemetry +from azure.cli.core.azclierror import FileOperationError +from knack.log import get_logger +import os +import logging +import requests +import json +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +import azext_k8s_troubleshoot._constants as consts +from azext_k8s_troubleshoot._client_factory import get_subscription_client, _resource_providers_client + +logger = get_logger(__name__) + + +class TimeoutHTTPAdapter(HTTPAdapter): + def __init__(self, *args, **kwargs): + self.timeout = consts.DEFAULT_REQUEST_TIMEOUT + if "timeout" in kwargs: + self.timeout = kwargs["timeout"] + del kwargs["timeout"] + super().__init__(*args, **kwargs) + + def send(self, request, **kwargs): + timeout = kwargs.get("timeout") + if timeout is None: + kwargs["timeout"] = self.timeout + return super().send(request, **kwargs) + + +def setup_logger(logger_name, log_file, level=logging.DEBUG): + loggr = logging.getLogger(logger_name) + formatter = logging.Formatter('%(asctime)s : %(levelname)s : %(message)s') + fileHandler = logging.FileHandler(log_file, mode='w') + fileHandler.setFormatter(formatter) + + loggr.setLevel(level) + loggr.addHandler(fileHandler) + + +def set_kube_config(kube_config): + if kube_config: + # Trim kubeconfig. This is required for windows os. + if (kube_config.startswith("'") or kube_config.startswith('"')): + kube_config = kube_config[1:] + if (kube_config.endswith("'") or kube_config.endswith('"')): + kube_config = kube_config[:-1] + return kube_config + return None + + +def load_kube_config(kube_config, kube_context): + try: + config.load_kube_config(config_file=kube_config, context=kube_context) + except Exception as e: + telemetry.set_exception(exception=e, fault_type=consts.Load_Kubeconfig_Fault_Type, + summary='Problem loading the kubeconfig file') + raise FileOperationError("Problem loading the kubeconfig file." + str(e)) + + +def get_latest_extension_version(extension_name='connectedk8s'): + try: + import re + git_url = "https://raw.githubusercontent.com/Azure/azure-cli-extensions/master/src/{}/setup.py".format(extension_name) + response = requests.get(git_url, timeout=10) + if response.status_code != 200: + logger.info("Failed to fetch the latest version from '%s' with status code '%s' and reason '%s'", + git_url, response.status_code, response.reason) + return None + for line in response.iter_lines(): + txt = line.decode('utf-8', errors='ignore') + if txt.startswith('VERSION'): + match = re.search(r'VERSION = \'(.*)\'$', txt) + if match: + return match.group(1) + else: + match = re.search(r'VERSION = \"(.*)\"$', txt) + if match: + return match.group(1) + return None + except Exception as ex: # pylint: disable=broad-except + logger.info("Failed to get the latest version from '%s'. %s", git_url, str(ex)) + return None + + +def get_existing_extension_version(extension_name='connectedk8s'): + from azure.cli.core.extension import get_extensions + extensions = get_extensions() + if extensions: + for ext in extensions: + if ext.name == extension_name: + return ext.version or 'Unknown' + + return 'NotFound' + + +def check_connectivity(url='https://example.org', max_retries=5, timeout=1): + import timeit + start = timeit.default_timer() + success = None + try: + with requests.Session() as s: + s.mount(url, requests.adapters.HTTPAdapter(max_retries=max_retries)) + s.head(url, timeout=timeout) + success = True + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as ex: + logger.info('Connectivity problem detected.') + logger.debug(ex) + success = False + stop = timeit.default_timer() + logger.debug('Connectivity check: %s sec', stop - start) + return success + + +def get_latest_kubernetes_version(): + retries = Retry(total=3, backoff_factor=1, status_forcelist=[413, 429, 500, 502, 503, 504]) + req_session = requests.Session() + adapter = TimeoutHTTPAdapter(max_retries=retries) + req_session.mount("https://", adapter) + req_session.mount("http://", adapter) + + url = consts.Kubernetes_Github_Latest_Release_Uri + + payload = {} + headers = {'Accept': 'application/vnd.github.v3+json'} + try: + response = req_session.request("GET", url, headers=headers, data=payload) + if response.status_code == 200: + latest_release = json.loads(response.text) + return latest_release["tag_name"] + else: + logger.warning("Couldn't fetch the latest kubernetes stable release information. Response status code: {}".format(response.status_code)) + except Exception as e: + logger.warning("Couldn't fetch the latest kubernetes stable release information. Error: " + str(e)) + + return None + + +def validate_azure_management_reachability(subscription_id, custom_logger): + try: + get_subscription_client().get(subscription_id) + except Exception as ex: + custom_logger.warning("Not able to reach azure management endpoints. Exception: " + str(ex)) + + +def check_system_permissions(custom_logger): + try: + import tempfile + chart_export_path = os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts') + os.makedirs(chart_export_path, exist_ok=True) + with tempfile.TemporaryDirectory(dir=chart_export_path): + return True + except (OSError, EnvironmentError): + return False + except Exception as ex: + custom_logger.debug("Couldn't check the system permissions for creating an azure arc charts directory. Error: {}".format(str(ex)), exc_info=True) + return False + + +def check_provider_registrations(cli_ctx, custom_logger): + try: + rp_client = _resource_providers_client(cli_ctx) + cc_registration_state = rp_client.get(consts.Connected_Cluster_Provider_Namespace).registration_state + if cc_registration_state != "Registered": + custom_logger.error("{} provider is not registered".format(consts.Connected_Cluster_Provider_Namespace)) + kc_registration_state = rp_client.get(consts.Kubernetes_Configuration_Provider_Namespace).registration_state + if kc_registration_state != "Registered": + custom_logger.error("{} provider is not registered".format(consts.Kubernetes_Configuration_Provider_Namespace)) + except Exception as ex: + custom_logger.debug("Couldn't check the required provider's registration status. Error: {}".format(str(ex)), exc_info=True) + + +# Returns a list of kubernetes pod objects in a given namespace. Object description at: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodList.md +def get_pod_list(api_instance, namespace, label_selector="", field_selector=""): + try: + return api_instance.list_namespaced_pod(namespace, label_selector=label_selector, field_selector="") + except Exception as e: + logger.debug("Error occurred when retrieving pod information: " + str(e)) + + +def check_linux_amd64_node(configuration, custom_logger=None): + api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) + try: + api_response = api_instance.list_node() + for item in api_response.items: + node_arch = item.metadata.labels.get("kubernetes.io/arch") + node_os = item.metadata.labels.get("kubernetes.io/os") + if node_arch == "amd64" and node_os == "linux": + return True + except Exception as e: # pylint: disable=broad-except + if custom_logger: + custom_logger.error("Error occured while trying to find a linux/amd64 node: " + str(e)) + else: + logger.debug("Error occured while trying to find a linux/amd64 node: " + str(e)) + # utils.kubernetes_exception_handler(e, consts.Kubernetes_Node_Type_Fetch_Fault, 'Unable to find a linux/amd64 node', + # raise_error=False) + return False + + +def get_config_dp_endpoint(cmd, location): + cloud_based_domain = cmd.cli_ctx.cloud.endpoints.active_directory.split('.')[2] + config_dp_endpoint = "https://{}.dp.kubernetesconfiguration.azure.{}".format(location, cloud_based_domain) + return config_dp_endpoint \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py new file mode 100644 index 00000000000..821630f5f34 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py @@ -0,0 +1,20 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +def example_name_or_id_validator(cmd, namespace): + # Example of a storage account name or ID validator. + # See: https://github.com/Azure/azure-cli/blob/dev/doc/authoring_command_modules/authoring_commands.md#supporting-name-or-id-parameters + from azure.cli.core.commands.client_factory import get_subscription_id + from msrestazure.tools import is_valid_resource_id, resource_id + if namespace.storage_account: + if not is_valid_resource_id(namespace.RESOURCE): + namespace.storage_account = resource_id( + subscription=get_subscription_id(cmd.cli_ctx), + resource_group=namespace.resource_group_name, + namespace='Microsoft.Storage', + type='storageAccounts', + name=namespace.storage_account + ) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json b/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json new file mode 100644 index 00000000000..f3541f0d286 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json @@ -0,0 +1,3 @@ +{ + "azext.minCliCoreVersion": "2.16.0" +} \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py new file mode 100644 index 00000000000..e3a5448672c --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py @@ -0,0 +1,29 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +# pylint: disable=line-too-long +from azure.cli.core.commands import CliCommandType +from azext_k8s_troubleshoot._client_factory import cf_k8s_troubleshoot + + +def load_command_table(self, _): + + # TODO: Add command type here + # k8s-troubleshoot_sdk = CliCommandType( + # operations_tmpl='.operations#None.{}', + # client_factory=cf_k8s-troubleshoot) + + + with self.command_group('k8s-troubleshoot') as g: + g.custom_command('diagnose', 'diagnose_k8s_troubleshoot') + # g.command('delete', 'delete') + # g.custom_command('list', 'list_k8s_troubleshoot') + # g.show_command('show', 'get') + # g.generic_update_command('update', setter_name='update', custom_func_name='update_k8s-troubleshoot') + + + with self.command_group('k8s-troubleshoot', is_preview=True): + pass + diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py new file mode 100644 index 00000000000..419a5dac7ec --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -0,0 +1,90 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +from knack.util import CLIError +from knack.log import get_logger +import logging +import version +from kubernetes import client as kube_client, config +import azext_k8s_troubleshoot._utils as utils + + +logger = get_logger(__name__) + + +def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=None, kube_context=None, location=None, storage_account=None, + sas_token=None, output_file=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz')): + troubleshoot_log_path = os.path.join(os.path.expanduser('~'), '.azure', 'connected8s_troubleshoot.log') + utils.setup_logger('connectedk8s_troubleshoot', troubleshoot_log_path) + tr_logger = logging.getLogger('connectedk8s_troubleshoot') + + kube_config = utils.set_kube_config(kube_config) + + # Loading the kubeconfig file in kubernetes client configuration + utils.load_kube_config(kube_config, kube_context) + configuration = kube_client.Configuration() + try: + latest_connectedk8s_version = utils.get_latest_extension_version() + local_connectedk8s_version = utils.get_existing_extension_version() + tr_logger.info("Latest available connectedk8s version: {}".format(latest_connectedk8s_version)) + tr_logger.info("Local connectedk8s version: {}".format(local_connectedk8s_version)) + if latest_connectedk8s_version and local_connectedk8s_version != 'Unknown' and local_connectedk8s_version != 'NotFound': + if version.parse(local_connectedk8s_version) < version.parse(latest_connectedk8s_version): + logger.warning("You have an update pending. You can update the connectedk8s extension to latest v{} using 'az extension update -n connectedk8s'".format(latest_connectedk8s_version)) + + permitted = utils.check_system_permissions(tr_logger) + if not permitted: + tr_logger.error("CLI doesn't have the permission/privilege to install azure arc charts at path {}".format(os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts'))) + required_node_exists = utils.check_linux_amd64_node(configuration, custom_logger=tr_logger) + if not required_node_exists: + tr_logger.warning("Couldn't find any linux/amd64 node on the Kubernetes cluster") + config_dp_endpoint = utils.get_config_dp_endpoint(cmd, location) + helm_registry_path = utils.get_helm_registry(cmd, config_dp_endpoint) + tr_logger.info("Helm Registry path : {}".format(helm_registry_path)) + utils.check_provider_registrations(cmd.cli_ctx, tr_logger) + os.environ['HELM_EXPERIMENTAL_OCI'] = '1' + utils.pull_helm_chart(helm_registry_path, kube_config, kube_context) + + try: + # Fetch ConnectedCluster + connected_cluster = client.get(resource_group_name, cluster_name, raw=True) + tr_logger.info("Connected cluster resource: {}".format(connected_cluster.response.content)) + except Exception as ex: + try: + if ex.error.error.code == "NotFound" or ex.error.error.code == "ResourceNotFound": + tr_logger.error("Connected cluster resource doesn't exist. " + str(ex)) + except AttributeError: + pass + tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) + + kapi_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) + try: + pod_list = kapi_instance.list_namespaced_pod('azure-arc') + pods_count = 0 + for pod in pod_list.items: + pods_count += 1 + if pod.status.phase != 'Running': + tr_logger.warning("Pod {} is in {} state. Reason: {}. Container statuses: {}".format(pod.metadata.name, pod.status.phase, pod.status.reason, pod.status.container_statuses)) + + if pods_count == 0: + tr_logger.warning("No pods found in azure-arc namespace.") + + except Exception as ex: + tr_logger.error("Error occured while fetching pod's statues : {}".format(str(ex))) + + try: + # Creating the .tar.gz for logs and deleting the actual log file + import tarfile + with tarfile.open(output_file, "w:gz") as tar: + tar.add(troubleshoot_log_path, 'connected8s_troubleshoot.log') + logging.shutdown() # To release log file handler, so that the actual log file can be removed after archiving + os.remove(troubleshoot_log_path) + except Exception as ex: + tr_logger.error("Error occured while archiving the log file: {}".format(str(ex)), exc_info=True) + + except Exception as ex: + tr_logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) + logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py new file mode 100644 index 00000000000..2dcf9bb68b3 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py @@ -0,0 +1,5 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# ----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py new file mode 100644 index 00000000000..2dcf9bb68b3 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py @@ -0,0 +1,5 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# ----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py new file mode 100644 index 00000000000..f866882e548 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py @@ -0,0 +1,40 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import unittest + +from azure_devtools.scenario_tests import AllowLargeResponse +from azure.cli.testsdk import (ScenarioTest, ResourceGroupPreparer) + + +TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) + + +class K8s-troubleshootScenarioTest(ScenarioTest): + + @ResourceGroupPreparer(name_prefix='cli_test_k8s-troubleshoot') + def test_k8s-troubleshoot(self, resource_group): + + self.kwargs.update({ + 'name': 'test1' + }) + + self.cmd('k8s-troubleshoot create -g {rg} -n {name} --tags foo=doo', checks=[ + self.check('tags.foo', 'doo'), + self.check('name', '{name}') + ]) + self.cmd('k8s-troubleshoot update -g {rg} -n {name} --tags foo=boo', checks=[ + self.check('tags.foo', 'boo') + ]) + count = len(self.cmd('k8s-troubleshoot list').get_output_in_json()) + self.cmd('k8s-troubleshoot show - {rg} -n {name}', checks=[ + self.check('name', '{name}'), + self.check('resourceGroup', '{rg}'), + self.check('tags.foo', 'boo') + ]) + self.cmd('k8s-troubleshoot delete -g {rg} -n {name}') + final_count = len(self.cmd('k8s-troubleshoot list').get_output_in_json()) + self.assertTrue(final_count, count - 1) \ No newline at end of file diff --git a/src/k8s-troubleshoot/setup.cfg b/src/k8s-troubleshoot/setup.cfg new file mode 100644 index 00000000000..3c6e79cf31d --- /dev/null +++ b/src/k8s-troubleshoot/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 diff --git a/src/k8s-troubleshoot/setup.py b/src/k8s-troubleshoot/setup.py new file mode 100644 index 00000000000..2ec29e3adcc --- /dev/null +++ b/src/k8s-troubleshoot/setup.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +from codecs import open +from setuptools import setup, find_packages +try: + from azure_bdist_wheel import cmdclass +except ImportError: + from distutils import log as logger + logger.warn("Wheel is not available, disabling bdist_wheel hook") + +# TODO: Confirm this is the right version number you want and it matches your +# HISTORY.rst entry. +VERSION = '0.1.0' + +# The full list of classifiers is available at +# https://pypi.python.org/pypi?%3Aaction=list_classifiers +CLASSIFIERS = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'License :: OSI Approved :: MIT License', +] + +# TODO: Add any additional SDK dependencies here +DEPENDENCIES = [ + 'azure-cli-core' +] + +with open('README.rst', 'r', encoding='utf-8') as f: + README = f.read() +with open('HISTORY.rst', 'r', encoding='utf-8') as f: + HISTORY = f.read() + +setup( + name='k8s-troubleshoot', + version=VERSION, + description='Microsoft Azure Command-Line Tools K8s-troubleshoot Extension', + # TODO: Update author and email, if applicable + author='Microsoft Corporation', + author_email='azpycli@microsoft.com', + # TODO: consider pointing directly to your source code instead of the generic repo + url='https://github.com/Azure/azure-cli-extensions', + long_description=README + '\n\n' + HISTORY, + license='MIT', + classifiers=CLASSIFIERS, + packages=find_packages(), + install_requires=DEPENDENCIES, + package_data={'azext_k8s-troubleshoot': ['azext_metadata.json']}, +) \ No newline at end of file From a14b2add076265ef83c61888414c7d2f9b65acb1 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 13:40:31 +0530 Subject: [PATCH 02/16] Adding tests --- .../azext_k8s_troubleshoot/__init__.py | 14 +-- .../azext_k8s_troubleshoot/_client_factory.py | 16 ++++ .../azext_k8s_troubleshoot/_help.py | 43 ++++----- .../azext_k8s_troubleshoot/_params.py | 5 +- .../azext_k8s_troubleshoot/_utils.py | 91 ++++++++++++++++--- .../azext_k8s_troubleshoot/commands.py | 19 +--- .../azext_k8s_troubleshoot/custom.py | 18 +++- .../latest/test_k8s-troubleshoot_scenario.py | 40 -------- src/k8s-troubleshoot/setup.py | 12 +-- 9 files changed, 144 insertions(+), 114 deletions(-) delete mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py index 61e44675a3d..1b08379a810 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py @@ -12,12 +12,12 @@ class K8s_troubleshootCommandsLoader(AzCommandsLoader): def __init__(self, cli_ctx=None): from azure.cli.core.commands import CliCommandType - from azext_k8s_troubleshoot._client_factory import cf_k8s_troubleshoot - k8s-troubleshoot_custom = CliCommandType( - operations_tmpl='azext_k8s-troubleshoot.custom#{}', - client_factory=cf_k8s-troubleshoot) - super(K8s-troubleshootCommandsLoader, self).__init__(cli_ctx=cli_ctx, - custom_command_type=k8s-troubleshoot_custom) + from azext_k8s_troubleshoot._client_factory import cf_connectedk8s + k8s_troubleshoot_custom = CliCommandType( + operations_tmpl='azext_k8s_troubleshoot.custom#{}', + client_factory=cf_connectedk8s) + super(K8s_troubleshootCommandsLoader, self).__init__(cli_ctx=cli_ctx, + custom_command_type=k8s_troubleshoot_custom) def load_command_table(self, args): from azext_k8s_troubleshoot.commands import load_command_table @@ -25,7 +25,7 @@ def load_command_table(self, args): return self.command_table def load_arguments(self, command): - from azext_k8s-troubleshoot._params import load_arguments + from azext_k8s_troubleshoot._params import load_arguments load_arguments(self, command) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py index 5d54bed4404..51f11f643df 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py @@ -5,6 +5,17 @@ from azure.cli.core.commands.client_factory import get_mgmt_service_client +from azure.cli.core.profiles import ResourceType +from azure.common.client_factory import get_client_from_cli_profile + + +def cf_connectedk8s(cli_ctx, *_): + from azure.mgmt.hybridkubernetes import ConnectedKubernetesClient + return get_mgmt_service_client(cli_ctx, ConnectedKubernetesClient) + + +def cf_connected_cluster(cli_ctx, _): + return cf_connectedk8s(cli_ctx).connected_cluster def _resource_client_factory(cli_ctx, subscription_id=None): @@ -18,3 +29,8 @@ def _resource_providers_client(cli_ctx): # Alternate: This should also work # subscription_id = get_subscription_id(cli_ctx) # return get_mgmt_service_client(cli_ctx, ResourceType.MGMT_RESOURCE_RESOURCES, subscription_id=subscription_id).providers + + +def get_subscription_client(): + from azure.mgmt.resource import SubscriptionClient + return get_client_from_cli_profile(SubscriptionClient).subscriptions diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py index af7d1ea7365..9cb58304027 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py @@ -9,30 +9,25 @@ helps['k8s-troubleshoot'] = """ type: group - short-summary: Commands to manage K8s-troubleshoots. + short-summary: Commands to troubleshoot azure-arc connected kubernetes cluster. """ -helps['k8s-troubleshoot create'] = """ - type: command - short-summary: Create a K8s-troubleshoot. -""" - -helps['k8s-troubleshoot list'] = """ - type: command - short-summary: List K8s-troubleshoots. -""" - -# helps['k8s-troubleshoot delete'] = """ -# type: command -# short-summary: Delete a K8s-troubleshoot. -# """ - -# helps['k8s-troubleshoot show'] = """ -# type: command -# short-summary: Show details of a K8s-troubleshoot. -# """ -# helps['k8s-troubleshoot update'] = """ -# type: command -# short-summary: Update a K8s-troubleshoot. -# """ +helps['k8s-troubleshoot diagnose'] = """ + type: command + short-summary: Collects diagnose infomation and gets logs on the connected cluster. + parameters: + - name: --storage-account + type: string + short-summary: Name or ID of the storage account to save the diagnostic information. + - name: --sas-token + type: string + short-summary: The SAS token with writable permission for the storage account. + examples: + - name: using storage account name and a shared access signature token with write permission + text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account MyStorageAccount --sas-token "MySasToken" + - name: using the resource id of a storage account resource you own. + text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account "MyStoreageAccountResourceId" + - name: using the storagea account in diagnostics settings for your connected cluster. + text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster +""" \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py index c4ce0482c5e..cca6e9d2d2f 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py @@ -13,10 +13,9 @@ def load_arguments(self, _): with self.argument_context('k8s-troubleshoot diagnose') as c: c.argument('location', arg_type=get_location_type(self.cli_ctx), validator=get_default_location_from_resource_group) - c.argument('cluster_name', options_list=['--name', '-n'], id_part='name', help='The name of the connected cluster.') + c.argument('cluster_name', options_list=['--name', '-n'], help='The name of the connected cluster.') c.argument('kube_config', options_list=['--kube-config'], help='Path to the kube config file.') c.argument('kube_context', options_list=['--kube-context'], help='Kubconfig context from current machine.') c.argument('storage_account', options_list=['--storage-account'], help='Name or ID of the storage account to save the diagnostic information') c.argument('sas_token', options_list=['--sas-token'], help='The SAS token with writable permission for the storage account.') - c.argument('output_file', options_list=['--output-file'], type=file_type, default=os.path.join(os.path.expanduser('~'), '.azure', 'az_connectedk8s_troubleshoot_output.tar.gz'), help="Output zipped file path for the logs collected during troubleshoot.") - + c.argument('output_file', options_list=['--output-file'], type=file_type, default=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz'), help="Output zipped file path for the logs collected during troubleshoot.") diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index c5f45f62e5a..d2724d32cf1 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -5,12 +5,13 @@ from kubernetes import client as kube_client, config from azure.cli.core import telemetry -from azure.cli.core.azclierror import FileOperationError +from azure.cli.core.util import send_raw_request from knack.log import get_logger import os import logging import requests import json +from subprocess import Popen, PIPE from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry import azext_k8s_troubleshoot._constants as consts @@ -55,13 +56,11 @@ def set_kube_config(kube_config): return None -def load_kube_config(kube_config, kube_context): +def load_kube_config(kube_config, kube_context, custom_logger=None): try: config.load_kube_config(config_file=kube_config, context=kube_context) except Exception as e: - telemetry.set_exception(exception=e, fault_type=consts.Load_Kubeconfig_Fault_Type, - summary='Problem loading the kubeconfig file') - raise FileOperationError("Problem loading the kubeconfig file." + str(e)) + handle_logging_error(custom_logger, "Problem loading the kubeconfig file." + str(e)) def get_latest_extension_version(extension_name='connectedk8s'): @@ -179,12 +178,13 @@ def check_provider_registrations(cli_ctx, custom_logger): # Returns a list of kubernetes pod objects in a given namespace. Object description at: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodList.md def get_pod_list(api_instance, namespace, label_selector="", field_selector=""): try: - return api_instance.list_namespaced_pod(namespace, label_selector=label_selector, field_selector="") + return api_instance.list_namespaced_pod(namespace, label_selector=label_selector, field_selector=field_selector) except Exception as e: logger.debug("Error occurred when retrieving pod information: " + str(e)) def check_linux_amd64_node(configuration, custom_logger=None): + try_list_node_fix() api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) try: api_response = api_instance.list_node() @@ -194,10 +194,7 @@ def check_linux_amd64_node(configuration, custom_logger=None): if node_arch == "amd64" and node_os == "linux": return True except Exception as e: # pylint: disable=broad-except - if custom_logger: - custom_logger.error("Error occured while trying to find a linux/amd64 node: " + str(e)) - else: - logger.debug("Error occured while trying to find a linux/amd64 node: " + str(e)) + handle_logging_error(custom_logger, "Error occured while trying to find a linux/amd64 node: " + str(e)) # utils.kubernetes_exception_handler(e, consts.Kubernetes_Node_Type_Fetch_Fault, 'Unable to find a linux/amd64 node', # raise_error=False) return False @@ -206,4 +203,76 @@ def check_linux_amd64_node(configuration, custom_logger=None): def get_config_dp_endpoint(cmd, location): cloud_based_domain = cmd.cli_ctx.cloud.endpoints.active_directory.split('.')[2] config_dp_endpoint = "https://{}.dp.kubernetesconfiguration.azure.{}".format(location, cloud_based_domain) - return config_dp_endpoint \ No newline at end of file + return config_dp_endpoint + + +def get_helm_registry(cmd, config_dp_endpoint, custom_logger=None, dp_endpoint_dogfood=None, release_train_dogfood=None): + # Setting uri + get_chart_location_url = "{}/{}/GetLatestHelmPackagePath?api-version=2019-11-01-preview".format(config_dp_endpoint, 'azure-arc-k8sagents') + release_train = os.getenv('RELEASETRAIN') if os.getenv('RELEASETRAIN') else 'stable' + if dp_endpoint_dogfood: + get_chart_location_url = "{}/azure-arc-k8sagents/GetLatestHelmPackagePath?api-version=2019-11-01-preview".format(dp_endpoint_dogfood) + if release_train_dogfood: + release_train = release_train_dogfood + uri_parameters = ["releaseTrain={}".format(release_train)] + resource = cmd.cli_ctx.cloud.endpoints.active_directory_resource_id + + # Sending request + try: + r = send_raw_request(cmd.cli_ctx, 'post', get_chart_location_url, uri_parameters=uri_parameters, resource=resource) + except Exception as e: + handle_logging_error(custom_logger, "Error while fetching helm chart registry path: " + str(e)) + if r.content: + try: + return r.json().get('repositoryPath') + except Exception as e: + handle_logging_error(custom_logger, "Error while fetching helm chart registry path from JSON response: " + str(e)) + else: + handle_logging_error(custom_logger, "No content was found in helm registry path response.") + + +def pull_helm_chart(registry_path, kube_config, kube_context, custom_logger=None): + cmd_helm_chart_pull = ["helm", "chart", "pull", registry_path] + if kube_config: + cmd_helm_chart_pull.extend(["--kubeconfig", kube_config]) + if kube_context: + cmd_helm_chart_pull.extend(["--kube-context", kube_context]) + response_helm_chart_pull = Popen(cmd_helm_chart_pull, stdout=PIPE, stderr=PIPE) + _, error_helm_chart_pull = response_helm_chart_pull.communicate() + if response_helm_chart_pull.returncode != 0: + handle_logging_error(custom_logger, "Unable to pull helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_pull.decode("ascii")) + + +def handle_logging_error(custom_logger, error_string): + if custom_logger: + custom_logger.error(error_string) + else: + logger.error(error_string) + + +def can_create_clusterrolebindings(configuration, custom_logger=None): + try: + api_instance = kube_client.AuthorizationV1Api(kube_client.ApiClient(configuration)) + access_review = kube_client.V1SelfSubjectAccessReview(spec={ + "resourceAttributes":{ + "verb":"create", + "resource":"clusterrolebindings", + "group": "rbac.authorization.k8s.io" + } + }) + response = api_instance.create_self_subject_access_review(access_review) + return response.status.allowed + except Exception as ex: + handle_logging_error(custom_logger, "Couldn't check for the permission to create clusterrolebindings on this k8s cluster. Error: {}".format(str(ex))) + + +def try_list_node_fix(): + try: + from kubernetes.client.models.v1_container_image import V1ContainerImage + + def names(self, names): + self._names = names + + V1ContainerImage.names = V1ContainerImage.names.setter(names) + except Exception as ex: + logger.debug("Error while trying to monkey patch the fix for list_node(): {}".format(str(ex))) \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py index e3a5448672c..18bbcb13034 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py @@ -5,25 +5,10 @@ # pylint: disable=line-too-long from azure.cli.core.commands import CliCommandType -from azext_k8s_troubleshoot._client_factory import cf_k8s_troubleshoot +from azext_k8s_troubleshoot._client_factory import cf_connectedk8s, cf_connected_cluster def load_command_table(self, _): - - # TODO: Add command type here - # k8s-troubleshoot_sdk = CliCommandType( - # operations_tmpl='.operations#None.{}', - # client_factory=cf_k8s-troubleshoot) - - - with self.command_group('k8s-troubleshoot') as g: + with self.command_group('k8s-troubleshoot', client_factory=cf_connected_cluster) as g: g.custom_command('diagnose', 'diagnose_k8s_troubleshoot') - # g.command('delete', 'delete') - # g.custom_command('list', 'list_k8s_troubleshoot') - # g.show_command('show', 'get') - # g.generic_update_command('update', setter_name='update', custom_func_name='update_k8s-troubleshoot') - - - with self.command_group('k8s-troubleshoot', is_preview=True): - pass diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index 419a5dac7ec..61b1c4bf22f 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -7,16 +7,18 @@ from knack.util import CLIError from knack.log import get_logger import logging -import version +from packaging import version from kubernetes import client as kube_client, config import azext_k8s_troubleshoot._utils as utils +import colorama # pylint: disable=import-error logger = get_logger(__name__) def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=None, kube_context=None, location=None, storage_account=None, - sas_token=None, output_file=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz')): + sas_token=None, output_file=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz')): + colorama.init() troubleshoot_log_path = os.path.join(os.path.expanduser('~'), '.azure', 'connected8s_troubleshoot.log') utils.setup_logger('connectedk8s_troubleshoot', troubleshoot_log_path) tr_logger = logging.getLogger('connectedk8s_troubleshoot') @@ -24,7 +26,7 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku kube_config = utils.set_kube_config(kube_config) # Loading the kubeconfig file in kubernetes client configuration - utils.load_kube_config(kube_config, kube_context) + utils.load_kube_config(kube_config, kube_context, custom_logger=tr_logger) configuration = kube_client.Configuration() try: latest_connectedk8s_version = utils.get_latest_extension_version() @@ -35,6 +37,10 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku if version.parse(local_connectedk8s_version) < version.parse(latest_connectedk8s_version): logger.warning("You have an update pending. You can update the connectedk8s extension to latest v{} using 'az extension update -n connectedk8s'".format(latest_connectedk8s_version)) + crb_permission = utils.can_create_clusterrolebindings(configuration, custom_logger=tr_logger) + if not crb_permission: + tr_logger.error("CLI logged in cred doesn't have permission to create clusterrolebindings on this kubernetes cluster.") + permitted = utils.check_system_permissions(tr_logger) if not permitted: tr_logger.error("CLI doesn't have the permission/privilege to install azure arc charts at path {}".format(os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts'))) @@ -42,11 +48,11 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku if not required_node_exists: tr_logger.warning("Couldn't find any linux/amd64 node on the Kubernetes cluster") config_dp_endpoint = utils.get_config_dp_endpoint(cmd, location) - helm_registry_path = utils.get_helm_registry(cmd, config_dp_endpoint) + helm_registry_path = utils.get_helm_registry(cmd, config_dp_endpoint, custom_logger=tr_logger) tr_logger.info("Helm Registry path : {}".format(helm_registry_path)) utils.check_provider_registrations(cmd.cli_ctx, tr_logger) os.environ['HELM_EXPERIMENTAL_OCI'] = '1' - utils.pull_helm_chart(helm_registry_path, kube_config, kube_context) + utils.pull_helm_chart(helm_registry_path, kube_config, kube_context, custom_logger=tr_logger) try: # Fetch ConnectedCluster @@ -85,6 +91,8 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku except Exception as ex: tr_logger.error("Error occured while archiving the log file: {}".format(str(ex)), exc_info=True) + print(f"{colorama.Style.BRIGHT}{colorama.Fore.GREEN}The diagnostic logs have been collected and archived at '{output_file}'.") + except Exception as ex: tr_logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py deleted file mode 100644 index f866882e548..00000000000 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s-troubleshoot_scenario.py +++ /dev/null @@ -1,40 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- - -import os -import unittest - -from azure_devtools.scenario_tests import AllowLargeResponse -from azure.cli.testsdk import (ScenarioTest, ResourceGroupPreparer) - - -TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) - - -class K8s-troubleshootScenarioTest(ScenarioTest): - - @ResourceGroupPreparer(name_prefix='cli_test_k8s-troubleshoot') - def test_k8s-troubleshoot(self, resource_group): - - self.kwargs.update({ - 'name': 'test1' - }) - - self.cmd('k8s-troubleshoot create -g {rg} -n {name} --tags foo=doo', checks=[ - self.check('tags.foo', 'doo'), - self.check('name', '{name}') - ]) - self.cmd('k8s-troubleshoot update -g {rg} -n {name} --tags foo=boo', checks=[ - self.check('tags.foo', 'boo') - ]) - count = len(self.cmd('k8s-troubleshoot list').get_output_in_json()) - self.cmd('k8s-troubleshoot show - {rg} -n {name}', checks=[ - self.check('name', '{name}'), - self.check('resourceGroup', '{rg}'), - self.check('tags.foo', 'boo') - ]) - self.cmd('k8s-troubleshoot delete -g {rg} -n {name}') - final_count = len(self.cmd('k8s-troubleshoot list').get_output_in_json()) - self.assertTrue(final_count, count - 1) \ No newline at end of file diff --git a/src/k8s-troubleshoot/setup.py b/src/k8s-troubleshoot/setup.py index 2ec29e3adcc..afea16d77ec 100644 --- a/src/k8s-troubleshoot/setup.py +++ b/src/k8s-troubleshoot/setup.py @@ -32,9 +32,9 @@ 'License :: OSI Approved :: MIT License', ] -# TODO: Add any additional SDK dependencies here DEPENDENCIES = [ - 'azure-cli-core' + 'kubernetes==11.0.0', + 'azure-mgmt-hybridkubernetes' ] with open('README.rst', 'r', encoding='utf-8') as f: @@ -46,15 +46,13 @@ name='k8s-troubleshoot', version=VERSION, description='Microsoft Azure Command-Line Tools K8s-troubleshoot Extension', - # TODO: Update author and email, if applicable author='Microsoft Corporation', - author_email='azpycli@microsoft.com', - # TODO: consider pointing directly to your source code instead of the generic repo - url='https://github.com/Azure/azure-cli-extensions', + author_email='k8connect@microsoft.com', + url='https://github.com/Azure/azure-cli-extensions/tree/master/src/k8s-troubleshoot', long_description=README + '\n\n' + HISTORY, license='MIT', classifiers=CLASSIFIERS, packages=find_packages(), install_requires=DEPENDENCIES, - package_data={'azext_k8s-troubleshoot': ['azext_metadata.json']}, + package_data={'azext_k8s_troubleshoot': ['azext_metadata.json']}, ) \ No newline at end of file From 675ea86c8a3d2e70193e5c7ec94ed1dd1e32a6ee Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 13:43:19 +0530 Subject: [PATCH 03/16] Nit --- src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py index 9cb58304027..f4a9adc11be 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py @@ -28,6 +28,6 @@ text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account MyStorageAccount --sas-token "MySasToken" - name: using the resource id of a storage account resource you own. text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account "MyStoreageAccountResourceId" - - name: using the storagea account in diagnostics settings for your connected cluster. + - name: using the storage account in diagnostics settings for your connected cluster. text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster """ \ No newline at end of file From 81c24ffcf133759325b42309d7b39b73071ed1b3 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 13:46:32 +0530 Subject: [PATCH 04/16] fix --- .../latest/test_k8s_troubleshoot_scenario.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py new file mode 100644 index 00000000000..0f2f4f43ddf --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py @@ -0,0 +1,20 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import unittest + +from azure_devtools.scenario_tests import AllowLargeResponse +from azure.cli.testsdk import (ScenarioTest, ResourceGroupPreparer) + + +TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) + + +class K8s_troubleshootScenarioTest(ScenarioTest): + + @ResourceGroupPreparer(name_prefix='cli_test_k8s_troubleshoot') + def test_k8s_troubleshoot(self, resource_group): + pass \ No newline at end of file From 9d06f4b30efa306adfdb13726a92b648528de3a9 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 13:48:54 +0530 Subject: [PATCH 05/16] fix2 --- .../tests/latest/test_k8s_troubleshoot_scenario.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py index 0f2f4f43ddf..bb409fc07a3 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py @@ -7,14 +7,13 @@ import unittest from azure_devtools.scenario_tests import AllowLargeResponse -from azure.cli.testsdk import (ScenarioTest, ResourceGroupPreparer) +from azure.cli.testsdk import (LiveScenarioTest, ResourceGroupPreparer) TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) -class K8s_troubleshootScenarioTest(ScenarioTest): +class K8s_troubleshootScenarioTest(LiveScenarioTest): - @ResourceGroupPreparer(name_prefix='cli_test_k8s_troubleshoot') def test_k8s_troubleshoot(self, resource_group): pass \ No newline at end of file From 2d13b5d7cbbd6fb62061ae8b505cf38ba4260a03 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 14:33:24 +0530 Subject: [PATCH 06/16] Create python-publish.yml --- .github/workflows/python-publish.yml | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/python-publish.yml diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 00000000000..3f007d265e9 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + workflow_dispatch: + # Trigger the workflow on push, + # but only for the main branch + push: + branches: + - master + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies and build + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + cd src/k8s-troubleshoot + ls + printf "[bdist_wheel]\nuniversal=1\n" > setup.cfg + python setup.py sdist bdist_wheel + cd dist + ls + - name: publish .whl + uses: actions/upload-artifact@v2 + with: + name: k8s-troubleshoot-whl + path: src/k8s-troubleshoot/dist/k8s_troubleshoot-*.whl From 8ee8bc7bed4382c5a53b977089252811097fffd7 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 14:44:32 +0530 Subject: [PATCH 07/16] add version --- src/k8s-troubleshoot/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/k8s-troubleshoot/setup.py b/src/k8s-troubleshoot/setup.py index afea16d77ec..90b7a418f24 100644 --- a/src/k8s-troubleshoot/setup.py +++ b/src/k8s-troubleshoot/setup.py @@ -34,7 +34,8 @@ DEPENDENCIES = [ 'kubernetes==11.0.0', - 'azure-mgmt-hybridkubernetes' + 'azure-mgmt-hybridkubernetes', + 'packaging' ] with open('README.rst', 'r', encoding='utf-8') as f: From 6418110f2157fd7c8b29f99a8ba4b72e21cabc7e Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 27 Apr 2021 16:13:04 +0530 Subject: [PATCH 08/16] add version upd --- src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py | 2 +- src/k8s-troubleshoot/setup.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index 61b1c4bf22f..be3c5a55058 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -7,7 +7,7 @@ from knack.util import CLIError from knack.log import get_logger import logging -from packaging import version +from setuptools._vendor.packaging import version from kubernetes import client as kube_client, config import azext_k8s_troubleshoot._utils as utils import colorama # pylint: disable=import-error diff --git a/src/k8s-troubleshoot/setup.py b/src/k8s-troubleshoot/setup.py index 90b7a418f24..afea16d77ec 100644 --- a/src/k8s-troubleshoot/setup.py +++ b/src/k8s-troubleshoot/setup.py @@ -34,8 +34,7 @@ DEPENDENCIES = [ 'kubernetes==11.0.0', - 'azure-mgmt-hybridkubernetes', - 'packaging' + 'azure-mgmt-hybridkubernetes' ] with open('README.rst', 'r', encoding='utf-8') as f: From 6b9cdea5f7a946f9e3d2be3996e1ec5b490991cd Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Wed, 28 Apr 2021 12:53:45 +0530 Subject: [PATCH 09/16] Added cert logs --- .../azext_k8s_troubleshoot/_constants.py | 3 +++ .../azext_k8s_troubleshoot/_utils.py | 15 ++++++++++++++- .../azext_k8s_troubleshoot/custom.py | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py index dca9654731c..9bd4c6141e7 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py @@ -9,7 +9,10 @@ Connected_Cluster_Provider_Namespace = 'Microsoft.Kubernetes' Kubernetes_Configuration_Provider_Namespace = 'Microsoft.KubernetesConfiguration' Custom_Locations_Provider_Namespace = 'Microsoft.ExtendedLocation' +Arc_Namespace = 'azure-arc' DEFAULT_REQUEST_TIMEOUT = 10 # seconds +AZURE_IDENTITY_CERTIFICATE_SECRET = 'azure-identity-certificate' +ISO_861_Time_format = "%Y-%m-%dT%H:%M:%SZ" # Custom fault types diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index d2724d32cf1..04fa84f93b8 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -11,6 +11,7 @@ import logging import requests import json +import colorama # pylint: disable=import-error from subprocess import Popen, PIPE from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry @@ -275,4 +276,16 @@ def names(self, names): V1ContainerImage.names = V1ContainerImage.names.setter(names) except Exception as ex: - logger.debug("Error while trying to monkey patch the fix for list_node(): {}".format(str(ex))) \ No newline at end of file + logger.debug("Error while trying to monkey patch the fix for list_node(): {}".format(str(ex))) + + +def format_hyperlink(the_link): + # usage : f'{format_hyperlink("https://azure.microsoft.com/en-us/features/storage-explorer/")}' + return f'\033[1m{colorama.Style.BRIGHT}{colorama.Fore.BLUE}{the_link}{colorama.Style.RESET_ALL}' + + +def get_kubernetes_secret(api_instance, namespace, secret_name, custom_logger=None): + try: + return api_instance.read_namespaced_secret(secret_name, namespace) + except Exception as e: + handle_logging_error(custom_logger, "Error occurred when retrieving secret '{}': ".format(secret_name) + str(e)) \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index be3c5a55058..e261e0f3b5b 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -4,12 +4,15 @@ # -------------------------------------------------------------------------------------------- import os +from datetime import datetime, timezone from knack.util import CLIError from knack.log import get_logger import logging +import json from setuptools._vendor.packaging import version from kubernetes import client as kube_client, config import azext_k8s_troubleshoot._utils as utils +import azext_k8s_troubleshoot._constants as consts import colorama # pylint: disable=import-error @@ -81,6 +84,21 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku except Exception as ex: tr_logger.error("Error occured while fetching pod's statues : {}".format(str(ex))) + cert_secret = utils.get_kubernetes_secret(kapi_instance, consts.Arc_Namespace, consts.AZURE_IDENTITY_CERTIFICATE_SECRET, custom_logger=tr_logger) + if (not cert_secret) or (not hasattr(cert_secret, 'data')) or (consts.AZURE_IDENTITY_CERTIFICATE_SECRET not in cert_secret.data): + tr_logger.error("{} secret is not present on the kubernetes cluster".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + logger.warning("{} secret is not present on the kubernetes cluster".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + + try: + cc_object = json.loads(connected_cluster.response.content) + cert_expirn_time = datetime.strptime(cc_object.get("properties").get("managedIdentityCertificateExpirationTime"), consts.ISO_861_Time_format).replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + if cert_expirn_time != datetime.min and cert_expirn_time < current_time: + tr_logger.error("MSI certificate on the cluster has expired.") + logger.warning("MSI certificate on the cluster has expired.") + except Exception as ex: + tr_logger.error("Error occured while checking if the MSI certificate has expired: {}".format(str(ex)), exc_info=True) + try: # Creating the .tar.gz for logs and deleting the actual log file import tarfile From a4d957ff62e219d1c123929907aec0caf9c11d63 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Wed, 28 Apr 2021 15:52:12 +0530 Subject: [PATCH 10/16] Enhance pod condition log --- src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index e261e0f3b5b..cc2b0fa3044 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -76,7 +76,7 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku for pod in pod_list.items: pods_count += 1 if pod.status.phase != 'Running': - tr_logger.warning("Pod {} is in {} state. Reason: {}. Container statuses: {}".format(pod.metadata.name, pod.status.phase, pod.status.reason, pod.status.container_statuses)) + tr_logger.warning("Pod {} is in {} state. Reason: {}. Container statuses: {} .\n Current condition of this pod: {}\n".format(pod.metadata.name, pod.status.phase, pod.status.reason, pod.status.container_statuses, pod.status.conditions)) if pods_count == 0: tr_logger.warning("No pods found in azure-arc namespace.") @@ -86,8 +86,8 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku cert_secret = utils.get_kubernetes_secret(kapi_instance, consts.Arc_Namespace, consts.AZURE_IDENTITY_CERTIFICATE_SECRET, custom_logger=tr_logger) if (not cert_secret) or (not hasattr(cert_secret, 'data')) or (consts.AZURE_IDENTITY_CERTIFICATE_SECRET not in cert_secret.data): - tr_logger.error("{} secret is not present on the kubernetes cluster".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) - logger.warning("{} secret is not present on the kubernetes cluster".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + tr_logger.error("{} secret is not present on the kubernetes cluster.".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + logger.warning("{} secret is not present on the kubernetes cluster.".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) try: cc_object = json.loads(connected_cluster.response.content) From 59011f04e33c6610cfff28f1651434753cbfe872 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Thu, 29 Apr 2021 12:50:46 +0530 Subject: [PATCH 11/16] Enhancements --- src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py | 4 ++-- src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index 04fa84f93b8..d2cba9c90f7 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -61,7 +61,7 @@ def load_kube_config(kube_config, kube_context, custom_logger=None): try: config.load_kube_config(config_file=kube_config, context=kube_context) except Exception as e: - handle_logging_error(custom_logger, "Problem loading the kubeconfig file." + str(e)) + handle_logging_error(custom_logger, "Problem loading the kubeconfig file. " + str(e)) def get_latest_extension_version(extension_name='connectedk8s'): @@ -246,7 +246,7 @@ def pull_helm_chart(registry_path, kube_config, kube_context, custom_logger=None def handle_logging_error(custom_logger, error_string): if custom_logger: - custom_logger.error(error_string) + custom_logger.error(error_string, exc_info=True) else: logger.error(error_string) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index cc2b0fa3044..a5d04930967 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -14,6 +14,7 @@ import azext_k8s_troubleshoot._utils as utils import azext_k8s_troubleshoot._constants as consts import colorama # pylint: disable=import-error +from azure.cli.core.azclierror import CLIInternalError logger = get_logger(__name__) @@ -113,4 +114,4 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku except Exception as ex: tr_logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) - logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) + raise CLIInternalError("Error occurred while troubleshooting: " + str(ex)) From cee92d303557ee3d88c10a08b53b1dfff99a027a Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 4 May 2021 17:36:26 +0530 Subject: [PATCH 12/16] Fix help --- src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py index f4a9adc11be..ceddeb1797e 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py @@ -25,9 +25,9 @@ short-summary: The SAS token with writable permission for the storage account. examples: - name: using storage account name and a shared access signature token with write permission - text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account MyStorageAccount --sas-token "MySasToken" + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster --storage-account MyStorageAccount --sas-token "MySasToken" - name: using the resource id of a storage account resource you own. - text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster --storage-account "MyStoreageAccountResourceId" + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster --storage-account "MyStoreageAccountResourceId" - name: using the storage account in diagnostics settings for your connected cluster. - text: az connectedk8s troubleshoot -g MyResourceGroup -n ConnectedCluster + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster """ \ No newline at end of file From 9262e7f8d0cfa5ae64de5f0048459673f1692009 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Tue, 4 May 2021 19:33:25 +0530 Subject: [PATCH 13/16] add delete job stuck log --- .../azext_k8s_troubleshoot/_utils.py | 18 +++++++++++++++++- .../azext_k8s_troubleshoot/custom.py | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index d2cba9c90f7..05a0a813c3f 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -288,4 +288,20 @@ def get_kubernetes_secret(api_instance, namespace, secret_name, custom_logger=No try: return api_instance.read_namespaced_secret(secret_name, namespace) except Exception as e: - handle_logging_error(custom_logger, "Error occurred when retrieving secret '{}': ".format(secret_name) + str(e)) \ No newline at end of file + handle_logging_error(custom_logger, "Error occurred when retrieving secret '{}': ".format(secret_name) + str(e)) + + +def check_delete_job(configuration, namespace, custom_logger=None): + try: + api_instance = kube_client.BatchV1Api(kube_client.ApiClient(configuration)) + api_response = api_instance.list_namespaced_job(namespace) + if api_response.items: + annotations = list(api_response.items)[0].metadata.annotations + if annotations.get("helm.sh/hook") == "pre-delete": + delete_job = list(api_response.items)[0] + job_status = delete_job.status + job_conditions = job_status.conditions + if job_status.succeeded == 0 or job_status.active > 0: + custom_logger.info("Delete Job status conditions: {}".format(job_status.conditions)) + except Exception as e: + handle_logging_error(custom_logger, "Error occurred while retrieving status of the delete job: {}".format(str(e))) \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index a5d04930967..410dbb9d02f 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -100,6 +100,8 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku except Exception as ex: tr_logger.error("Error occured while checking if the MSI certificate has expired: {}".format(str(ex)), exc_info=True) + utils.check_delete_job(configuration, 'azure-arc', custom_logger=tr_logger) + try: # Creating the .tar.gz for logs and deleting the actual log file import tarfile From 3fc63ffb4bfa043475c7303a09122a891af5c114 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Wed, 5 May 2021 10:56:43 +0530 Subject: [PATCH 14/16] fix --- src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index 05a0a813c3f..95126fb8f9d 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -295,13 +295,13 @@ def check_delete_job(configuration, namespace, custom_logger=None): try: api_instance = kube_client.BatchV1Api(kube_client.ApiClient(configuration)) api_response = api_instance.list_namespaced_job(namespace) - if api_response.items: - annotations = list(api_response.items)[0].metadata.annotations + for item in list(api_response.items): + annotations = item.metadata.annotations if annotations.get("helm.sh/hook") == "pre-delete": - delete_job = list(api_response.items)[0] - job_status = delete_job.status - job_conditions = job_status.conditions + job_status = item.status + job_conditions = item.conditions if job_status.succeeded == 0 or job_status.active > 0: custom_logger.info("Delete Job status conditions: {}".format(job_status.conditions)) + break except Exception as e: handle_logging_error(custom_logger, "Error occurred while retrieving status of the delete job: {}".format(str(e))) \ No newline at end of file From 489251cf969797325f7a66daadf3290bc6f64783 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Mon, 10 May 2021 13:17:09 +0530 Subject: [PATCH 15/16] Fixes and updates --- .../azext_k8s_troubleshoot/_utils.py | 6 ++---- .../azext_k8s_troubleshoot/custom.py | 21 +++---------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py index 95126fb8f9d..974abd90777 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -160,7 +160,7 @@ def check_system_permissions(custom_logger): return False except Exception as ex: custom_logger.debug("Couldn't check the system permissions for creating an azure arc charts directory. Error: {}".format(str(ex)), exc_info=True) - return False + return None def check_provider_registrations(cli_ctx, custom_logger): @@ -196,8 +196,6 @@ def check_linux_amd64_node(configuration, custom_logger=None): return True except Exception as e: # pylint: disable=broad-except handle_logging_error(custom_logger, "Error occured while trying to find a linux/amd64 node: " + str(e)) - # utils.kubernetes_exception_handler(e, consts.Kubernetes_Node_Type_Fetch_Fault, 'Unable to find a linux/amd64 node', - # raise_error=False) return False @@ -265,6 +263,7 @@ def can_create_clusterrolebindings(configuration, custom_logger=None): return response.status.allowed except Exception as ex: handle_logging_error(custom_logger, "Couldn't check for the permission to create clusterrolebindings on this k8s cluster. Error: {}".format(str(ex))) + return None def try_list_node_fix(): @@ -299,7 +298,6 @@ def check_delete_job(configuration, namespace, custom_logger=None): annotations = item.metadata.annotations if annotations.get("helm.sh/hook") == "pre-delete": job_status = item.status - job_conditions = item.conditions if job_status.succeeded == 0 or job_status.active > 0: custom_logger.info("Delete Job status conditions: {}".format(job_status.conditions)) break diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index 410dbb9d02f..b4e3ab869ef 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -39,24 +39,11 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku tr_logger.info("Local connectedk8s version: {}".format(local_connectedk8s_version)) if latest_connectedk8s_version and local_connectedk8s_version != 'Unknown' and local_connectedk8s_version != 'NotFound': if version.parse(local_connectedk8s_version) < version.parse(latest_connectedk8s_version): - logger.warning("You have an update pending. You can update the connectedk8s extension to latest v{} using 'az extension update -n connectedk8s'".format(latest_connectedk8s_version)) + print("You have an update pending. You can update the connectedk8s extension to latest v{} using 'az extension update -n connectedk8s'".format(latest_connectedk8s_version)) - crb_permission = utils.can_create_clusterrolebindings(configuration, custom_logger=tr_logger) + crb_permission = utils.can_create_clusterrolebindings(configuration, custom_logger=tr_logger) # To add in connectedk8s connect command itself if not crb_permission: - tr_logger.error("CLI logged in cred doesn't have permission to create clusterrolebindings on this kubernetes cluster.") - - permitted = utils.check_system_permissions(tr_logger) - if not permitted: - tr_logger.error("CLI doesn't have the permission/privilege to install azure arc charts at path {}".format(os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts'))) - required_node_exists = utils.check_linux_amd64_node(configuration, custom_logger=tr_logger) - if not required_node_exists: - tr_logger.warning("Couldn't find any linux/amd64 node on the Kubernetes cluster") - config_dp_endpoint = utils.get_config_dp_endpoint(cmd, location) - helm_registry_path = utils.get_helm_registry(cmd, config_dp_endpoint, custom_logger=tr_logger) - tr_logger.info("Helm Registry path : {}".format(helm_registry_path)) - utils.check_provider_registrations(cmd.cli_ctx, tr_logger) - os.environ['HELM_EXPERIMENTAL_OCI'] = '1' - utils.pull_helm_chart(helm_registry_path, kube_config, kube_context, custom_logger=tr_logger) + tr_logger.error("CLI logged-in credentials doesn't have permission to create clusterrolebindings on this kubernetes cluster.") try: # Fetch ConnectedCluster @@ -100,8 +87,6 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku except Exception as ex: tr_logger.error("Error occured while checking if the MSI certificate has expired: {}".format(str(ex)), exc_info=True) - utils.check_delete_job(configuration, 'azure-arc', custom_logger=tr_logger) - try: # Creating the .tar.gz for logs and deleting the actual log file import tarfile From e659f0ccf0b743c0e3c93197ff50bd10f95e3e86 Mon Sep 17 00:00:00 2001 From: Arpit Gupta Date: Mon, 10 May 2021 14:06:06 +0530 Subject: [PATCH 16/16] Fix --- src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py index b4e3ab869ef..4addaece3ea 100644 --- a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -53,9 +53,10 @@ def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, ku try: if ex.error.error.code == "NotFound" or ex.error.error.code == "ResourceNotFound": tr_logger.error("Connected cluster resource doesn't exist. " + str(ex)) + else: + tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) except AttributeError: - pass - tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) + tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) kapi_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) try: