From 88161cf62d7cc808ee94466897b127aaeaccf1d6 Mon Sep 17 00:00:00 2001 From: afieffe <97748210+afieffe@users.noreply.github.com> Date: Fri, 7 Apr 2023 10:19:12 +0200 Subject: [PATCH] Adding dedup on images already assessed Skipping images with an evaluation already available in the time windows specified. --- utils/scan_controller.py | 60 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/utils/scan_controller.py b/utils/scan_controller.py index db19466..1edc2ad 100644 --- a/utils/scan_controller.py +++ b/utils/scan_controller.py @@ -156,6 +156,51 @@ def _build_container_query(self, registry=None): return query_text + def _build_container_assessment_cache(self, registry=None): + logger.info('Fetching container assessments since: %s...', self._start_time) + + query = { + 'timeFilter': {'startTime': self._start_time, 'endTime': self._end_time}, + 'filters': [{'field': 'severity', + 'expression': 'in', 'values': + ['Critical', 'High', 'Medium', 'Low', 'Info']}], + 'returns': ['imageId', 'evalCtx'] + } + scanned_containers = [] + vulns_conts = self._lw_client.vulnerabilities.containers.search(json=query) + for page in vulns_conts: + scanned_containers = scanned_containers+page['data'] + + logger.info('Building container assessment cache...') + returned_containers = 0 + scanned_container_cache = {} + + for scanned_container in scanned_containers: + registry = scanned_container['evalCtx']['image_info']['registry'] + repository = scanned_container['evalCtx']['image_info']['repo'] + image_id = scanned_container['imageId'] + # Images scanned by the inline already contains the registry name + if repository.startswith(registry): + qualified_repo = f'{repository}' + else: + qualified_repo = f'{registry}/{repository}' + qualified_repo = qualified_repo.lstrip('/') + qualified_repo = qualified_repo.replace('http://', '')\ + .replace('https://', '') + + if qualified_repo not in scanned_container_cache: + scanned_container_cache[qualified_repo] = [image_id] + else: + if image_id not in scanned_container_cache[qualified_repo]: + scanned_container_cache[qualified_repo].append(image_id) + + returned_containers += 1 + + logger.info('Previously Assessed Container Count: %s', returned_containers) + logger.debug(json.dumps(scanned_container_cache, indent=4)) + + return scanned_container_cache + def _create_inline_scanner_integration(self): logger.info('Creating new Inline Scanner...') @@ -180,7 +225,7 @@ def _deduplicate_scans(self): skipped_containers = 0 temp_scan_queue = [] - + scanned_container_cache = self._build_container_assessment_cache() if self._lw_client.subaccount: if self._lw_client.subaccount not in self.scan_cache.keys(): self.scan_cache[self._lw_client.subaccount] = {} @@ -196,6 +241,14 @@ def _deduplicate_scans(self): qualified_repo = f'{registry}/{repository}' + # Skip if the container was previously scanned in the current window + if qualified_repo in scanned_container_cache: + if image_id in scanned_container_cache[qualified_repo]: + logger.info('Skipping already assessed %s \ + with image ID %s', qualified_repo, image_id) + skipped_containers += 1 + continue + # Skip if the container has a previous scan within the current window if qualified_repo in local_scan_cache.keys(): if image_id in local_scan_cache[qualified_repo].keys(): @@ -332,7 +385,7 @@ def _load_scan_cache(self): with open(self.SCAN_CACHE_FILE, 'r', encoding='utf-8') as scan_cache: try: return json.loads(scan_cache.read()) - except Exception as err: + except ValueError as err: # If this fails, it's likely due to an old cache before expirations # were implemented - Rebuild the cache logger.warning( @@ -469,8 +522,7 @@ def create_scan_task( access_token=self._inline_scanner_access_token, account_name=self._lw_client._account, inline_scanner_path=self._inline_scanner_path, - inline_scanner_prune=self._inline_scanner_prune, - ) + inline_scanner_prune=self._inline_scanner_prune) elif self._proxy_scanner_addr: scan_msg += '(Proxy Scanner)' logger.info(scan_msg)