diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 00000000000..3f007d265e9 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + workflow_dispatch: + # Trigger the workflow on push, + # but only for the main branch + push: + branches: + - master + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies and build + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + cd src/k8s-troubleshoot + ls + printf "[bdist_wheel]\nuniversal=1\n" > setup.cfg + python setup.py sdist bdist_wheel + cd dist + ls + - name: publish .whl + uses: actions/upload-artifact@v2 + with: + name: k8s-troubleshoot-whl + path: src/k8s-troubleshoot/dist/k8s_troubleshoot-*.whl diff --git a/src/k8s-troubleshoot/HISTORY.rst b/src/k8s-troubleshoot/HISTORY.rst new file mode 100644 index 00000000000..8c34bccfff8 --- /dev/null +++ b/src/k8s-troubleshoot/HISTORY.rst @@ -0,0 +1,8 @@ +.. :changelog: + +Release History +=============== + +0.1.0 +++++++ +* Initial release. \ No newline at end of file diff --git a/src/k8s-troubleshoot/README.rst b/src/k8s-troubleshoot/README.rst new file mode 100644 index 00000000000..3f7ec4b808e --- /dev/null +++ b/src/k8s-troubleshoot/README.rst @@ -0,0 +1,5 @@ +Microsoft Azure CLI 'k8s-troubleshoot' Extension +========================================== + +This package is for the 'k8s-troubleshoot' extension. +i.e. 'az k8s-troubleshoot' \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py new file mode 100644 index 00000000000..1b08379a810 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/__init__.py @@ -0,0 +1,32 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from azure.cli.core import AzCommandsLoader + +from azext_k8s_troubleshoot._help import helps # pylint: disable=unused-import + + +class K8s_troubleshootCommandsLoader(AzCommandsLoader): + + def __init__(self, cli_ctx=None): + from azure.cli.core.commands import CliCommandType + from azext_k8s_troubleshoot._client_factory import cf_connectedk8s + k8s_troubleshoot_custom = CliCommandType( + operations_tmpl='azext_k8s_troubleshoot.custom#{}', + client_factory=cf_connectedk8s) + super(K8s_troubleshootCommandsLoader, self).__init__(cli_ctx=cli_ctx, + custom_command_type=k8s_troubleshoot_custom) + + def load_command_table(self, args): + from azext_k8s_troubleshoot.commands import load_command_table + load_command_table(self, args) + return self.command_table + + def load_arguments(self, command): + from azext_k8s_troubleshoot._params import load_arguments + load_arguments(self, command) + + +COMMAND_LOADER_CLS = K8s_troubleshootCommandsLoader diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py new file mode 100644 index 00000000000..51f11f643df --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_client_factory.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +from azure.cli.core.commands.client_factory import get_mgmt_service_client +from azure.cli.core.profiles import ResourceType +from azure.common.client_factory import get_client_from_cli_profile + + +def cf_connectedk8s(cli_ctx, *_): + from azure.mgmt.hybridkubernetes import ConnectedKubernetesClient + return get_mgmt_service_client(cli_ctx, ConnectedKubernetesClient) + + +def cf_connected_cluster(cli_ctx, _): + return cf_connectedk8s(cli_ctx).connected_cluster + + +def _resource_client_factory(cli_ctx, subscription_id=None): + return get_mgmt_service_client(cli_ctx, ResourceType.MGMT_RESOURCE_RESOURCES, subscription_id=subscription_id) + + +def _resource_providers_client(cli_ctx): + from azure.mgmt.resource import ResourceManagementClient + return get_mgmt_service_client(cli_ctx, ResourceManagementClient).providers + + # Alternate: This should also work + # subscription_id = get_subscription_id(cli_ctx) + # return get_mgmt_service_client(cli_ctx, ResourceType.MGMT_RESOURCE_RESOURCES, subscription_id=subscription_id).providers + + +def get_subscription_client(): + from azure.mgmt.resource import SubscriptionClient + return get_client_from_cli_profile(SubscriptionClient).subscriptions diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py new file mode 100644 index 00000000000..9bd4c6141e7 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_constants.py @@ -0,0 +1,22 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +# pylint: disable=line-too-long + +Connected_Cluster_Provider_Namespace = 'Microsoft.Kubernetes' +Kubernetes_Configuration_Provider_Namespace = 'Microsoft.KubernetesConfiguration' +Custom_Locations_Provider_Namespace = 'Microsoft.ExtendedLocation' +Arc_Namespace = 'azure-arc' +DEFAULT_REQUEST_TIMEOUT = 10 # seconds +AZURE_IDENTITY_CERTIFICATE_SECRET = 'azure-identity-certificate' +ISO_861_Time_format = "%Y-%m-%dT%H:%M:%SZ" + +# Custom fault types + +Load_Kubeconfig_Fault_Type = "Error while loading kubeconfig" + +# URL constants +Kubernetes_Github_Latest_Release_Uri = "https://api.github.com/repos/kubernetes/kubernetes/releases/latest" \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py new file mode 100644 index 00000000000..ceddeb1797e --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_help.py @@ -0,0 +1,33 @@ +# coding=utf-8 +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from knack.help_files import helps # pylint: disable=unused-import + + +helps['k8s-troubleshoot'] = """ + type: group + short-summary: Commands to troubleshoot azure-arc connected kubernetes cluster. +""" + + +helps['k8s-troubleshoot diagnose'] = """ + type: command + short-summary: Collects diagnose infomation and gets logs on the connected cluster. + parameters: + - name: --storage-account + type: string + short-summary: Name or ID of the storage account to save the diagnostic information. + - name: --sas-token + type: string + short-summary: The SAS token with writable permission for the storage account. + examples: + - name: using storage account name and a shared access signature token with write permission + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster --storage-account MyStorageAccount --sas-token "MySasToken" + - name: using the resource id of a storage account resource you own. + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster --storage-account "MyStoreageAccountResourceId" + - name: using the storage account in diagnostics settings for your connected cluster. + text: az k8s-troubleshoot diagnose -g MyResourceGroup -n ConnectedCluster +""" \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py new file mode 100644 index 00000000000..cca6e9d2d2f --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_params.py @@ -0,0 +1,21 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- +# pylint: disable=line-too-long + +import os.path +from azure.cli.core.commands.parameters import get_location_type, file_type +from azure.cli.core.commands.validators import get_default_location_from_resource_group + + +def load_arguments(self, _): + + with self.argument_context('k8s-troubleshoot diagnose') as c: + c.argument('location', arg_type=get_location_type(self.cli_ctx), validator=get_default_location_from_resource_group) + c.argument('cluster_name', options_list=['--name', '-n'], help='The name of the connected cluster.') + c.argument('kube_config', options_list=['--kube-config'], help='Path to the kube config file.') + c.argument('kube_context', options_list=['--kube-context'], help='Kubconfig context from current machine.') + c.argument('storage_account', options_list=['--storage-account'], help='Name or ID of the storage account to save the diagnostic information') + c.argument('sas_token', options_list=['--sas-token'], help='The SAS token with writable permission for the storage account.') + c.argument('output_file', options_list=['--output-file'], type=file_type, default=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz'), help="Output zipped file path for the logs collected during troubleshoot.") diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py new file mode 100644 index 00000000000..974abd90777 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_utils.py @@ -0,0 +1,305 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +from kubernetes import client as kube_client, config +from azure.cli.core import telemetry +from azure.cli.core.util import send_raw_request +from knack.log import get_logger +import os +import logging +import requests +import json +import colorama # pylint: disable=import-error +from subprocess import Popen, PIPE +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +import azext_k8s_troubleshoot._constants as consts +from azext_k8s_troubleshoot._client_factory import get_subscription_client, _resource_providers_client + +logger = get_logger(__name__) + + +class TimeoutHTTPAdapter(HTTPAdapter): + def __init__(self, *args, **kwargs): + self.timeout = consts.DEFAULT_REQUEST_TIMEOUT + if "timeout" in kwargs: + self.timeout = kwargs["timeout"] + del kwargs["timeout"] + super().__init__(*args, **kwargs) + + def send(self, request, **kwargs): + timeout = kwargs.get("timeout") + if timeout is None: + kwargs["timeout"] = self.timeout + return super().send(request, **kwargs) + + +def setup_logger(logger_name, log_file, level=logging.DEBUG): + loggr = logging.getLogger(logger_name) + formatter = logging.Formatter('%(asctime)s : %(levelname)s : %(message)s') + fileHandler = logging.FileHandler(log_file, mode='w') + fileHandler.setFormatter(formatter) + + loggr.setLevel(level) + loggr.addHandler(fileHandler) + + +def set_kube_config(kube_config): + if kube_config: + # Trim kubeconfig. This is required for windows os. + if (kube_config.startswith("'") or kube_config.startswith('"')): + kube_config = kube_config[1:] + if (kube_config.endswith("'") or kube_config.endswith('"')): + kube_config = kube_config[:-1] + return kube_config + return None + + +def load_kube_config(kube_config, kube_context, custom_logger=None): + try: + config.load_kube_config(config_file=kube_config, context=kube_context) + except Exception as e: + handle_logging_error(custom_logger, "Problem loading the kubeconfig file. " + str(e)) + + +def get_latest_extension_version(extension_name='connectedk8s'): + try: + import re + git_url = "https://raw.githubusercontent.com/Azure/azure-cli-extensions/master/src/{}/setup.py".format(extension_name) + response = requests.get(git_url, timeout=10) + if response.status_code != 200: + logger.info("Failed to fetch the latest version from '%s' with status code '%s' and reason '%s'", + git_url, response.status_code, response.reason) + return None + for line in response.iter_lines(): + txt = line.decode('utf-8', errors='ignore') + if txt.startswith('VERSION'): + match = re.search(r'VERSION = \'(.*)\'$', txt) + if match: + return match.group(1) + else: + match = re.search(r'VERSION = \"(.*)\"$', txt) + if match: + return match.group(1) + return None + except Exception as ex: # pylint: disable=broad-except + logger.info("Failed to get the latest version from '%s'. %s", git_url, str(ex)) + return None + + +def get_existing_extension_version(extension_name='connectedk8s'): + from azure.cli.core.extension import get_extensions + extensions = get_extensions() + if extensions: + for ext in extensions: + if ext.name == extension_name: + return ext.version or 'Unknown' + + return 'NotFound' + + +def check_connectivity(url='https://example.org', max_retries=5, timeout=1): + import timeit + start = timeit.default_timer() + success = None + try: + with requests.Session() as s: + s.mount(url, requests.adapters.HTTPAdapter(max_retries=max_retries)) + s.head(url, timeout=timeout) + success = True + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as ex: + logger.info('Connectivity problem detected.') + logger.debug(ex) + success = False + stop = timeit.default_timer() + logger.debug('Connectivity check: %s sec', stop - start) + return success + + +def get_latest_kubernetes_version(): + retries = Retry(total=3, backoff_factor=1, status_forcelist=[413, 429, 500, 502, 503, 504]) + req_session = requests.Session() + adapter = TimeoutHTTPAdapter(max_retries=retries) + req_session.mount("https://", adapter) + req_session.mount("http://", adapter) + + url = consts.Kubernetes_Github_Latest_Release_Uri + + payload = {} + headers = {'Accept': 'application/vnd.github.v3+json'} + try: + response = req_session.request("GET", url, headers=headers, data=payload) + if response.status_code == 200: + latest_release = json.loads(response.text) + return latest_release["tag_name"] + else: + logger.warning("Couldn't fetch the latest kubernetes stable release information. Response status code: {}".format(response.status_code)) + except Exception as e: + logger.warning("Couldn't fetch the latest kubernetes stable release information. Error: " + str(e)) + + return None + + +def validate_azure_management_reachability(subscription_id, custom_logger): + try: + get_subscription_client().get(subscription_id) + except Exception as ex: + custom_logger.warning("Not able to reach azure management endpoints. Exception: " + str(ex)) + + +def check_system_permissions(custom_logger): + try: + import tempfile + chart_export_path = os.path.join(os.path.expanduser('~'), '.azure', 'AzureArcCharts') + os.makedirs(chart_export_path, exist_ok=True) + with tempfile.TemporaryDirectory(dir=chart_export_path): + return True + except (OSError, EnvironmentError): + return False + except Exception as ex: + custom_logger.debug("Couldn't check the system permissions for creating an azure arc charts directory. Error: {}".format(str(ex)), exc_info=True) + return None + + +def check_provider_registrations(cli_ctx, custom_logger): + try: + rp_client = _resource_providers_client(cli_ctx) + cc_registration_state = rp_client.get(consts.Connected_Cluster_Provider_Namespace).registration_state + if cc_registration_state != "Registered": + custom_logger.error("{} provider is not registered".format(consts.Connected_Cluster_Provider_Namespace)) + kc_registration_state = rp_client.get(consts.Kubernetes_Configuration_Provider_Namespace).registration_state + if kc_registration_state != "Registered": + custom_logger.error("{} provider is not registered".format(consts.Kubernetes_Configuration_Provider_Namespace)) + except Exception as ex: + custom_logger.debug("Couldn't check the required provider's registration status. Error: {}".format(str(ex)), exc_info=True) + + +# Returns a list of kubernetes pod objects in a given namespace. Object description at: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodList.md +def get_pod_list(api_instance, namespace, label_selector="", field_selector=""): + try: + return api_instance.list_namespaced_pod(namespace, label_selector=label_selector, field_selector=field_selector) + except Exception as e: + logger.debug("Error occurred when retrieving pod information: " + str(e)) + + +def check_linux_amd64_node(configuration, custom_logger=None): + try_list_node_fix() + api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) + try: + api_response = api_instance.list_node() + for item in api_response.items: + node_arch = item.metadata.labels.get("kubernetes.io/arch") + node_os = item.metadata.labels.get("kubernetes.io/os") + if node_arch == "amd64" and node_os == "linux": + return True + except Exception as e: # pylint: disable=broad-except + handle_logging_error(custom_logger, "Error occured while trying to find a linux/amd64 node: " + str(e)) + return False + + +def get_config_dp_endpoint(cmd, location): + cloud_based_domain = cmd.cli_ctx.cloud.endpoints.active_directory.split('.')[2] + config_dp_endpoint = "https://{}.dp.kubernetesconfiguration.azure.{}".format(location, cloud_based_domain) + return config_dp_endpoint + + +def get_helm_registry(cmd, config_dp_endpoint, custom_logger=None, dp_endpoint_dogfood=None, release_train_dogfood=None): + # Setting uri + get_chart_location_url = "{}/{}/GetLatestHelmPackagePath?api-version=2019-11-01-preview".format(config_dp_endpoint, 'azure-arc-k8sagents') + release_train = os.getenv('RELEASETRAIN') if os.getenv('RELEASETRAIN') else 'stable' + if dp_endpoint_dogfood: + get_chart_location_url = "{}/azure-arc-k8sagents/GetLatestHelmPackagePath?api-version=2019-11-01-preview".format(dp_endpoint_dogfood) + if release_train_dogfood: + release_train = release_train_dogfood + uri_parameters = ["releaseTrain={}".format(release_train)] + resource = cmd.cli_ctx.cloud.endpoints.active_directory_resource_id + + # Sending request + try: + r = send_raw_request(cmd.cli_ctx, 'post', get_chart_location_url, uri_parameters=uri_parameters, resource=resource) + except Exception as e: + handle_logging_error(custom_logger, "Error while fetching helm chart registry path: " + str(e)) + if r.content: + try: + return r.json().get('repositoryPath') + except Exception as e: + handle_logging_error(custom_logger, "Error while fetching helm chart registry path from JSON response: " + str(e)) + else: + handle_logging_error(custom_logger, "No content was found in helm registry path response.") + + +def pull_helm_chart(registry_path, kube_config, kube_context, custom_logger=None): + cmd_helm_chart_pull = ["helm", "chart", "pull", registry_path] + if kube_config: + cmd_helm_chart_pull.extend(["--kubeconfig", kube_config]) + if kube_context: + cmd_helm_chart_pull.extend(["--kube-context", kube_context]) + response_helm_chart_pull = Popen(cmd_helm_chart_pull, stdout=PIPE, stderr=PIPE) + _, error_helm_chart_pull = response_helm_chart_pull.communicate() + if response_helm_chart_pull.returncode != 0: + handle_logging_error(custom_logger, "Unable to pull helm chart from the registry '{}': ".format(registry_path) + error_helm_chart_pull.decode("ascii")) + + +def handle_logging_error(custom_logger, error_string): + if custom_logger: + custom_logger.error(error_string, exc_info=True) + else: + logger.error(error_string) + + +def can_create_clusterrolebindings(configuration, custom_logger=None): + try: + api_instance = kube_client.AuthorizationV1Api(kube_client.ApiClient(configuration)) + access_review = kube_client.V1SelfSubjectAccessReview(spec={ + "resourceAttributes":{ + "verb":"create", + "resource":"clusterrolebindings", + "group": "rbac.authorization.k8s.io" + } + }) + response = api_instance.create_self_subject_access_review(access_review) + return response.status.allowed + except Exception as ex: + handle_logging_error(custom_logger, "Couldn't check for the permission to create clusterrolebindings on this k8s cluster. Error: {}".format(str(ex))) + return None + + +def try_list_node_fix(): + try: + from kubernetes.client.models.v1_container_image import V1ContainerImage + + def names(self, names): + self._names = names + + V1ContainerImage.names = V1ContainerImage.names.setter(names) + except Exception as ex: + logger.debug("Error while trying to monkey patch the fix for list_node(): {}".format(str(ex))) + + +def format_hyperlink(the_link): + # usage : f'{format_hyperlink("https://azure.microsoft.com/en-us/features/storage-explorer/")}' + return f'\033[1m{colorama.Style.BRIGHT}{colorama.Fore.BLUE}{the_link}{colorama.Style.RESET_ALL}' + + +def get_kubernetes_secret(api_instance, namespace, secret_name, custom_logger=None): + try: + return api_instance.read_namespaced_secret(secret_name, namespace) + except Exception as e: + handle_logging_error(custom_logger, "Error occurred when retrieving secret '{}': ".format(secret_name) + str(e)) + + +def check_delete_job(configuration, namespace, custom_logger=None): + try: + api_instance = kube_client.BatchV1Api(kube_client.ApiClient(configuration)) + api_response = api_instance.list_namespaced_job(namespace) + for item in list(api_response.items): + annotations = item.metadata.annotations + if annotations.get("helm.sh/hook") == "pre-delete": + job_status = item.status + if job_status.succeeded == 0 or job_status.active > 0: + custom_logger.info("Delete Job status conditions: {}".format(job_status.conditions)) + break + except Exception as e: + handle_logging_error(custom_logger, "Error occurred while retrieving status of the delete job: {}".format(str(e))) \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py new file mode 100644 index 00000000000..821630f5f34 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/_validators.py @@ -0,0 +1,20 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +def example_name_or_id_validator(cmd, namespace): + # Example of a storage account name or ID validator. + # See: https://github.com/Azure/azure-cli/blob/dev/doc/authoring_command_modules/authoring_commands.md#supporting-name-or-id-parameters + from azure.cli.core.commands.client_factory import get_subscription_id + from msrestazure.tools import is_valid_resource_id, resource_id + if namespace.storage_account: + if not is_valid_resource_id(namespace.RESOURCE): + namespace.storage_account = resource_id( + subscription=get_subscription_id(cmd.cli_ctx), + resource_group=namespace.resource_group_name, + namespace='Microsoft.Storage', + type='storageAccounts', + name=namespace.storage_account + ) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json b/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json new file mode 100644 index 00000000000..f3541f0d286 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/azext_metadata.json @@ -0,0 +1,3 @@ +{ + "azext.minCliCoreVersion": "2.16.0" +} \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py new file mode 100644 index 00000000000..18bbcb13034 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/commands.py @@ -0,0 +1,14 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +# pylint: disable=line-too-long +from azure.cli.core.commands import CliCommandType +from azext_k8s_troubleshoot._client_factory import cf_connectedk8s, cf_connected_cluster + + +def load_command_table(self, _): + with self.command_group('k8s-troubleshoot', client_factory=cf_connected_cluster) as g: + g.custom_command('diagnose', 'diagnose_k8s_troubleshoot') + diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py new file mode 100644 index 00000000000..4addaece3ea --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/custom.py @@ -0,0 +1,105 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +from datetime import datetime, timezone +from knack.util import CLIError +from knack.log import get_logger +import logging +import json +from setuptools._vendor.packaging import version +from kubernetes import client as kube_client, config +import azext_k8s_troubleshoot._utils as utils +import azext_k8s_troubleshoot._constants as consts +import colorama # pylint: disable=import-error +from azure.cli.core.azclierror import CLIInternalError + + +logger = get_logger(__name__) + + +def diagnose_k8s_troubleshoot(cmd, client, resource_group_name, cluster_name, kube_config=None, kube_context=None, location=None, storage_account=None, + sas_token=None, output_file=os.path.join(os.path.expanduser('~'), '.azure', 'az_k8s_troubleshoot_output.tar.gz')): + colorama.init() + troubleshoot_log_path = os.path.join(os.path.expanduser('~'), '.azure', 'connected8s_troubleshoot.log') + utils.setup_logger('connectedk8s_troubleshoot', troubleshoot_log_path) + tr_logger = logging.getLogger('connectedk8s_troubleshoot') + + kube_config = utils.set_kube_config(kube_config) + + # Loading the kubeconfig file in kubernetes client configuration + utils.load_kube_config(kube_config, kube_context, custom_logger=tr_logger) + configuration = kube_client.Configuration() + try: + latest_connectedk8s_version = utils.get_latest_extension_version() + local_connectedk8s_version = utils.get_existing_extension_version() + tr_logger.info("Latest available connectedk8s version: {}".format(latest_connectedk8s_version)) + tr_logger.info("Local connectedk8s version: {}".format(local_connectedk8s_version)) + if latest_connectedk8s_version and local_connectedk8s_version != 'Unknown' and local_connectedk8s_version != 'NotFound': + if version.parse(local_connectedk8s_version) < version.parse(latest_connectedk8s_version): + print("You have an update pending. You can update the connectedk8s extension to latest v{} using 'az extension update -n connectedk8s'".format(latest_connectedk8s_version)) + + crb_permission = utils.can_create_clusterrolebindings(configuration, custom_logger=tr_logger) # To add in connectedk8s connect command itself + if not crb_permission: + tr_logger.error("CLI logged-in credentials doesn't have permission to create clusterrolebindings on this kubernetes cluster.") + + try: + # Fetch ConnectedCluster + connected_cluster = client.get(resource_group_name, cluster_name, raw=True) + tr_logger.info("Connected cluster resource: {}".format(connected_cluster.response.content)) + except Exception as ex: + try: + if ex.error.error.code == "NotFound" or ex.error.error.code == "ResourceNotFound": + tr_logger.error("Connected cluster resource doesn't exist. " + str(ex)) + else: + tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) + except AttributeError: + tr_logger.error("Couldn't check the existence of Connected cluster resource. Error: {}".format(str(ex))) + + kapi_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration)) + try: + pod_list = kapi_instance.list_namespaced_pod('azure-arc') + pods_count = 0 + for pod in pod_list.items: + pods_count += 1 + if pod.status.phase != 'Running': + tr_logger.warning("Pod {} is in {} state. Reason: {}. Container statuses: {} .\n Current condition of this pod: {}\n".format(pod.metadata.name, pod.status.phase, pod.status.reason, pod.status.container_statuses, pod.status.conditions)) + + if pods_count == 0: + tr_logger.warning("No pods found in azure-arc namespace.") + + except Exception as ex: + tr_logger.error("Error occured while fetching pod's statues : {}".format(str(ex))) + + cert_secret = utils.get_kubernetes_secret(kapi_instance, consts.Arc_Namespace, consts.AZURE_IDENTITY_CERTIFICATE_SECRET, custom_logger=tr_logger) + if (not cert_secret) or (not hasattr(cert_secret, 'data')) or (consts.AZURE_IDENTITY_CERTIFICATE_SECRET not in cert_secret.data): + tr_logger.error("{} secret is not present on the kubernetes cluster.".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + logger.warning("{} secret is not present on the kubernetes cluster.".format(consts.AZURE_IDENTITY_CERTIFICATE_SECRET)) + + try: + cc_object = json.loads(connected_cluster.response.content) + cert_expirn_time = datetime.strptime(cc_object.get("properties").get("managedIdentityCertificateExpirationTime"), consts.ISO_861_Time_format).replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + if cert_expirn_time != datetime.min and cert_expirn_time < current_time: + tr_logger.error("MSI certificate on the cluster has expired.") + logger.warning("MSI certificate on the cluster has expired.") + except Exception as ex: + tr_logger.error("Error occured while checking if the MSI certificate has expired: {}".format(str(ex)), exc_info=True) + + try: + # Creating the .tar.gz for logs and deleting the actual log file + import tarfile + with tarfile.open(output_file, "w:gz") as tar: + tar.add(troubleshoot_log_path, 'connected8s_troubleshoot.log') + logging.shutdown() # To release log file handler, so that the actual log file can be removed after archiving + os.remove(troubleshoot_log_path) + except Exception as ex: + tr_logger.error("Error occured while archiving the log file: {}".format(str(ex)), exc_info=True) + + print(f"{colorama.Style.BRIGHT}{colorama.Fore.GREEN}The diagnostic logs have been collected and archived at '{output_file}'.") + + except Exception as ex: + tr_logger.error("Exception caught while running troubleshoot: {}".format(str(ex)), exc_info=True) + raise CLIInternalError("Error occurred while troubleshooting: " + str(ex)) diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py new file mode 100644 index 00000000000..2dcf9bb68b3 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/__init__.py @@ -0,0 +1,5 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# ----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py new file mode 100644 index 00000000000..2dcf9bb68b3 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/__init__.py @@ -0,0 +1,5 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# ----------------------------------------------------------------------------- \ No newline at end of file diff --git a/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py new file mode 100644 index 00000000000..bb409fc07a3 --- /dev/null +++ b/src/k8s-troubleshoot/azext_k8s_troubleshoot/tests/latest/test_k8s_troubleshoot_scenario.py @@ -0,0 +1,19 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import os +import unittest + +from azure_devtools.scenario_tests import AllowLargeResponse +from azure.cli.testsdk import (LiveScenarioTest, ResourceGroupPreparer) + + +TEST_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), '..')) + + +class K8s_troubleshootScenarioTest(LiveScenarioTest): + + def test_k8s_troubleshoot(self, resource_group): + pass \ No newline at end of file diff --git a/src/k8s-troubleshoot/setup.cfg b/src/k8s-troubleshoot/setup.cfg new file mode 100644 index 00000000000..3c6e79cf31d --- /dev/null +++ b/src/k8s-troubleshoot/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 diff --git a/src/k8s-troubleshoot/setup.py b/src/k8s-troubleshoot/setup.py new file mode 100644 index 00000000000..afea16d77ec --- /dev/null +++ b/src/k8s-troubleshoot/setup.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + + +from codecs import open +from setuptools import setup, find_packages +try: + from azure_bdist_wheel import cmdclass +except ImportError: + from distutils import log as logger + logger.warn("Wheel is not available, disabling bdist_wheel hook") + +# TODO: Confirm this is the right version number you want and it matches your +# HISTORY.rst entry. +VERSION = '0.1.0' + +# The full list of classifiers is available at +# https://pypi.python.org/pypi?%3Aaction=list_classifiers +CLASSIFIERS = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'License :: OSI Approved :: MIT License', +] + +DEPENDENCIES = [ + 'kubernetes==11.0.0', + 'azure-mgmt-hybridkubernetes' +] + +with open('README.rst', 'r', encoding='utf-8') as f: + README = f.read() +with open('HISTORY.rst', 'r', encoding='utf-8') as f: + HISTORY = f.read() + +setup( + name='k8s-troubleshoot', + version=VERSION, + description='Microsoft Azure Command-Line Tools K8s-troubleshoot Extension', + author='Microsoft Corporation', + author_email='k8connect@microsoft.com', + url='https://github.com/Azure/azure-cli-extensions/tree/master/src/k8s-troubleshoot', + long_description=README + '\n\n' + HISTORY, + license='MIT', + classifiers=CLASSIFIERS, + packages=find_packages(), + install_requires=DEPENDENCIES, + package_data={'azext_k8s_troubleshoot': ['azext_metadata.json']}, +) \ No newline at end of file