From 6162a5174d543201524c32a2987a10ecfebf89ca Mon Sep 17 00:00:00 2001 From: Daniel Hass Date: Mon, 12 Jul 2021 13:22:46 +0200 Subject: [PATCH 1/5] added requirements.txt for validator-cli --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ec71fca --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pyyaml +ruamel.yaml +python-Levenshtein \ No newline at end of file From 1431b1f1972c107759f90a6f6c9792bccb73534a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ha=C3=9F?= Date: Mon, 12 Jul 2021 13:24:46 +0200 Subject: [PATCH 2/5] added Dockerfile for validator-cli --- Dockerfile.cli | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 Dockerfile.cli diff --git a/Dockerfile.cli b/Dockerfile.cli new file mode 100644 index 0000000..962f6a5 --- /dev/null +++ b/Dockerfile.cli @@ -0,0 +1,13 @@ +FROM python:3.9-slim-buster + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src/app + +COPY requirements.txt validator-cli.py validator.py yamlreader.py typoMistake.py ./ + +RUN pip install --no-cache-dir -r requirements.txt + +CMD [ "/bin/bash", "-c", "python ./validator-cli.py -f docker-compose.yaml -fi 'Duplicate Keys,Top level property,Duplicate ports,Container name,Typing mistakes,DNS,Duplicate expose' -o ./data/output_$(date +%Y-%m-%d).json"] \ No newline at end of file From 95ef973dd5e2946a1c969e2f5c9f32bcc321f21a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ha=C3=9F?= Date: Mon, 12 Jul 2021 13:26:24 +0200 Subject: [PATCH 3/5] first work on mao-integration - added new CLI option -o that saves the output JSON to a file - added warnings and error object for more detailed output of results - fixed several bugs --- validator-cli.py | 6 +++- validator.py | 79 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 24 deletions(-) diff --git a/validator-cli.py b/validator-cli.py index 41d49d7..699dba5 100755 --- a/validator-cli.py +++ b/validator-cli.py @@ -11,6 +11,7 @@ print(" -u: urlbased; direct URL or path specification") print(" -e: eventing; send results to Kafka endpoint with space and series selection") print(" -fi: filters; you can select filters as any as you want:\n for more info flag --fih might be helpful!") + print(" -o: JSON output path; path to with result JSON will be written") print("Example: {} -a elastest/deploy -e kafka.cloudlab.zhaw.ch/user-1-docker_label_consistency/nightly -fi 'Duplicate Keys,Top level property'".format(sys.argv[0])) sys.exit(1) @@ -20,6 +21,7 @@ urlbased = None eventing = None filters = [] + jsonpath = None i = 1 while i < len(sys.argv): @@ -34,6 +36,8 @@ elif sys.argv[i] == "-fi": filters = sys.argv[i + 1] filters = filters.split(',') + elif sys.argv[i] == "-o": + jsonpath = sys.argv[i + 1] elif sys.argv[i] == "--fih": print("Whole list of fliters is here!\n \n ====> 'Duplicate Keys','Top level property','Duplicate ports','Container name','Labels','Typing mistakes', 'DNS', 'Duplicate expose'\n \n How to use it? \n\n EZ!\n\n Something like this\n\n python validator-cli.py -a elastest/deploy -fi 'Duplicate Keys,Top level property' \n\n\t *****Warning*****\n\n Makesure that you enter this arg as a string!\n\n\t *****************") sys.exit(1) @@ -46,4 +50,4 @@ i += 1 my_validator = Validator() - my_validator.validator(autosearch, filebasedlist, urlbased, eventing, filebased, filters) \ No newline at end of file + my_validator.validator(autosearch, filebasedlist, urlbased, eventing, filebased, filters, jsonpath) \ No newline at end of file diff --git a/validator.py b/validator.py index f9919af..d8fbce3 100644 --- a/validator.py +++ b/validator.py @@ -13,6 +13,7 @@ import yamlreader import Levenshtein import typoMistake +from collections import Counter try: import kafka except: @@ -200,6 +201,8 @@ def __consistencycheck__(self, contents, labelArray): numservices = 0 alltags = {} faulty = {} + errors = Counter() + warnings = Counter() for content in contents: parsed = yamlreader.reader(contents[content]) @@ -215,7 +218,7 @@ def __consistencycheck__(self, contents, labelArray): faulty[contentname] = 0.0 - c = yaml.load(contents[content]) + c = yaml.load(contents[content], Loader=yaml.FullLoader) if 'Typing mistakes' in labelArray: err_message = "" @@ -263,9 +266,11 @@ def __consistencycheck__(self, contents, labelArray): if 'Container name' in labelArray: if not "container_name" in c["services"][service]: self.__log_writer__("**Warning** no container name found") + warnings['container_name_missing'] += 1 elif c["services"][service]["container_name"] in cachecontainername: self.__log_writer__("Duplicate container name: "+ c["services"][service]["container_name"]) # raise Exception ('Duplicate container name') + warnings['container_name_duplicate'] += 1 else: cachecontainername.append(c["services"][service]["container_name"]) if "volumes" in c["services"][service]: @@ -301,6 +306,8 @@ def __consistencycheck__(self, contents, labelArray): self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("Wrong type {} for volume \nVolume types are: volume, bind, tmpfs, npipe".format(volume['type'])) self.__log_writer__("=============================================") + errors['volume_type'] =+ 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 if 'source' in volume: if not os.path.exists(volume['source']): if volume['source'] not in cachevolumes: @@ -322,6 +329,7 @@ def __consistencycheck__(self, contents, labelArray): if port_host in cacheports: self.__log_writer__("Duplicate ports in service "+service+ " port "+ str(port_host)) # raise Exception ('Duplicate ports') + warnings['duplicate_ports'] += 1 else: cacheports.append(port_host) if type(port) == type(int()): @@ -350,35 +358,43 @@ def __consistencycheck__(self, contents, labelArray): self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("The DNS is not appropriate!") self.__log_writer__("=============================================") - else: - if ip not in cachedns: - cachedns.append(ip) - else: - self.__log_writer__("=================== ERROR ===================") - self.__log_writer__("Under service: {}".format(service)) - self.__log_writer__("Duplicate DNS!") - self.__log_writer__("=============================================") + errors['dns_malformed'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 + if ip not in cachedns: + cachedns.append(ip) + else: + self.__log_writer__("=================== ERROR ===================") + self.__log_writer__("Under service: {}".format(service)) + self.__log_writer__("Duplicate DNS!") + self.__log_writer__("=============================================") + errors['dns_duplicate'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 except: self.__log_writer__("=================== ERROR ===================") self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("The DNS is not appropriate!") self.__log_writer__("=============================================") + errors['dns_malformed'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 continue - if type(dns) == type(str()): + elif type(dns) == type(str()): try: - splitedIp = ip.split('.') - for section in splitedIp: - if len(section) > 3 or len(section) < 1: - self.__log_writer__("=================== ERROR ===================") - self.__log_writer__("Under service: {}".format(service)) - self.__log_writer__("The DNS is not appropriate!") - self.__log_writer__("=============================================") + splitedIp = dns.split('.') + if len(splitedIp) > 3 or len(splitedIp) < 1: + self.__log_writer__("=================== ERROR ===================") + self.__log_writer__("Under service: {}".format(service)) + self.__log_writer__("The DNS is not appropriate!") + self.__log_writer__("=============================================") + errors['dns_malformed'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 except: self.__log_writer__("=================== ERROR ===================") self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("The DNS is not appropriate!") self.__log_writer__("=============================================") + errors['dns_malformed'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 else: self.__log_writer__("=================== ERROR ===================") @@ -405,6 +421,7 @@ def __consistencycheck__(self, contents, labelArray): expose = c["services"][service]['expose'] if type(expose) == type(list()): for port in expose: + port = int(port) if 1 < port < 65536: if port not in cacheexpose: cacheexpose.append(port) @@ -413,16 +430,23 @@ def __consistencycheck__(self, contents, labelArray): self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("Duplicate port {} exposed!".format(port)) self.__log_writer__("=============================================") + errors['expose_port_duplicate'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 else: self.__log_writer__("=================== ERROR ===================") self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("The port {} that exposed is not appropriate!".format(port)) self.__log_writer__("=============================================") + errors['expose_port_malformed'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 else: self.__log_writer__("=================== ERROR ===================") self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("Value of expose can be a list!") - self.__log_writer__("=============================================") + self.__log_writer__("=============================================") + errors['expose_syntax'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 + if 'depends_on' in c["services"][service]: for denpendecy in c["services"][service]['depends_on']: if denpendecy not in cacheService: @@ -430,6 +454,8 @@ def __consistencycheck__(self, contents, labelArray): self.__log_writer__("Under service: {}".format(service)) self.__log_writer__("Wrong dependency! There is no such service with name of {}".format(denpendecy)) self.__log_writer__("=============================================") + errors['dependency_missing'] += 1 + faulty[contentname] = faulty.get(contentname, 0) + 1 if 'build' in c["services"][service]: build = c["services"][service]['build'] if type(build) == type(""): @@ -518,7 +544,7 @@ def __consistencycheck__(self, contents, labelArray): faulty[contentname] = faulty.get(contentname, 0) + 1 continue - return numservices, alltags, faulty + return numservices, alltags, faulty, errors, warnings def __sendmessage__(self, host, label, series, message): if kafka.__version__.startswith("0"): @@ -548,11 +574,13 @@ def __sendmessage__(self, host, label, series, message): time.sleep(t) t *= 2 - def validator(self, autosearch, filebasedlist, urlbased, eventing, filebased=None, labelArray=[]): + def validator(self, autosearch, filebasedlist, urlbased, eventing, filebased=None, labelArray=[], jsonpath= None): composefiles = [] d_start = time.time() - cachefiles = self.__loadcache__() + # TODO enable cache again + # cachefiles = self.__loadcache__() + cachefiles = [] if filebasedlist: f = open(filebasedlist) @@ -571,7 +599,7 @@ def validator(self, autosearch, filebasedlist, urlbased, eventing, filebased=Non contents = self.__loading__(cachefiles, composefiles) - numservices, alltags, faulty = self.__consistencycheck__(contents, labelArray) + numservices, alltags, faulty, error_types, warnings = self.__consistencycheck__(contents, labelArray) d_end = time.time() self.__log_writer__("services: {}".format(numservices)) @@ -583,6 +611,8 @@ def validator(self, autosearch, filebasedlist, urlbased, eventing, filebased=Non d = {} d["agent"] = "sentinel-generic-agent" d["services"] = float(numservices) + d["errors"] = error_types + d["warnings"] = warnings for label in alltags: d[label] = float(alltags[label]) d.update(faulty) @@ -590,5 +620,8 @@ def validator(self, autosearch, filebasedlist, urlbased, eventing, filebased=Non kafka, space, series = eventing.split("/") print("sending message... {}".format(d)) self.__sendmessage__(kafka, space, series, json.dumps(d)) + elif jsonpath != None: + with open(jsonpath, 'w') as outfile: + json.dump(d, outfile) else: - print("not sending message... {}".format(d)) + print("not sending message... {}".format(json.dumps(d))) From 468ae9778d2802bf5c6ab354e7d54bd008968df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ha=C3=9F?= Date: Tue, 27 Jul 2021 14:47:01 +0200 Subject: [PATCH 4/5] fixed versions of Python dependencies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index ec71fca..da1f67f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pyyaml -ruamel.yaml -python-Levenshtein \ No newline at end of file +PyYAML==5.4.1 +ruamel.yaml==0.17.10 +python-Levenshtein==0.12.2 \ No newline at end of file From 2bfa44264805418685b2de6a581e295de55f94f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ha=C3=9F?= Date: Tue, 27 Jul 2021 14:47:20 +0200 Subject: [PATCH 5/5] updated README with CLI container --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 84a6899..2549635 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,13 @@ You are ready to go! YES!
Feel free to add some features! +## CLI Container +The CLI of the Docker Compose Validator has been packaged as Docker container and can be built via the following command: + +``` +docker build -t / -f Dockerfile.cli . +``` + ## Note This tool extends the [label consistency checker](https://github.com/serviceprototypinglab/label-consistency) which targets Docker Compose and Kubernetes/OpenShift YAML files. It emerged from [research on microservice quality](https://mao-mao-research.github.io/) at Service Prototyping Lab, Zurich University of Applied Sciences.