diff --git a/src/darjeeling/candidate.py b/src/darjeeling/candidate.py index 669a8e3..7760f90 100644 --- a/src/darjeeling/candidate.py +++ b/src/darjeeling/candidate.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- -__all__ = ('Candidate',) +__all__ = ('Candidate', 'DiffCandidate') -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional import typing from bugzoo.core.patch import Patch import attr -from .core import Replacement, FileLine +from .core import (Replacement, FileLine) from .transformation import Transformation from .util import tuple_from_iterable @@ -19,13 +19,13 @@ class Candidate: """Represents a repair as a set of atomic program transformations.""" problem: 'Problem' = attr.ib(hash=False, eq=False) - transformations: Tuple[Transformation, ...] = \ + transformations: Optional[Tuple[Transformation, ...]] = \ attr.ib(converter=tuple_from_iterable) def to_diff(self) -> Patch: """Transforms this candidate patch into a concrete, unified diff.""" replacements = \ - map(lambda t: t.to_replacement(), self.transformations) + map(lambda t: t.to_replacement(), self.transformations) if self.transformations else {} replacements_by_file: Dict[str, List[Replacement]] = {} for rep in replacements: fn = rep.location.filename @@ -40,7 +40,10 @@ def lines_changed(self) -> List[FileLine]: Returns a list of source lines that are changed by this candidate patch. """ - return [t.line for t in self.transformations] + if self.transformations: + return [t.line for t in self.transformations] + else: + return [] @property def id(self) -> str: @@ -50,3 +53,71 @@ def id(self) -> str: def __repr__(self) -> str: return "Candidate<#{}>".format(self.id) + + +@attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) +class DiffPatch: + _file: str = attr.ib() + _patch: Patch = attr.ib(factory=Patch) + + def to_diff(self) -> Patch: + return self._patch + + @property + def files(self) -> List[str]: + return self._patch.files + + @property + def patch(self) -> Patch: + return self._patch + + @property + def file_name(self) -> str: + return self._file + + def __repr__(self) -> str: + return "DiffPatch<{}>".format(self.file_name) + + +@attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) +class DiffCandidate(Candidate): + """Represents a repair as a set of atomic program transformations.""" + _diffpatch: DiffPatch = attr.ib() + + def lines_changed(self) -> List[FileLine]: + locs: List[FileLine] = [] + # no accessibility to bugzoo.core.patch subcontent + # lines = [(f.old_fn,l) for f in self.get_file_patches()\ + # for h in f.__hunks \ + # for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))\ + # ] + # for f,l in lines: + # locs.append(FileLine(f,l)) + return locs + + def to_diff(self) -> Patch: + return self._diffpatch.to_diff() + + def get_file_patches(self): + return self._diffpatch._patch.__file_patches + + @property + def diffpatch(self) -> DiffPatch: + return self._diffpatch + + @property + def patch(self) -> Patch: + return self._diffpatch.patch + + @property + def file(self) -> str: + return self._diffpatch.file_name + + @property + def id(self) -> str: + """An eight-character hexadecimal identifier for this candidate.""" + hex_hash = hex(abs(hash(self))) + return hex_hash[2:10] + + def __repr__(self) -> str: + return "DiffCandidate<{}#{}>".format(self.file, self.id) diff --git a/src/darjeeling/cli/__init__.py b/src/darjeeling/cli/__init__.py index 1794f6f..e0042a6 100644 --- a/src/darjeeling/cli/__init__.py +++ b/src/darjeeling/cli/__init__.py @@ -16,11 +16,11 @@ from ..environment import Environment from ..problem import Problem from ..version import __version__ as VERSION -from ..config import Config +from ..config import Config, EvaluateConfig from ..events import CsvEventLogger, WebSocketEventHandler from ..plugins import LOADED_PLUGINS from ..resources import ResourceUsageTracker -from ..session import Session +from ..session import Session, EvaluateSession from ..exceptions import BadConfigurationException from ..util import duration_str @@ -96,6 +96,18 @@ def _default_log_filename(self) -> str: num = max(used_numbers) + 1 return os.path.join(os.getcwd(), 'darjeeling.log.{}'.format(num)) + @property + def _default_eval_log_filename(self) -> str: + # find all log file numbers that have been used in this directory + used_numbers = [int(s.rpartition('.')[-1]) + for s in glob.glob('evaluation.log.[0-9]*')] + + if not used_numbers: + return os.path.join(os.getcwd(), 'evaluation.log.0') + + num = max(used_numbers) + 1 + return os.path.join(os.getcwd(), 'evaluation.log.{}'.format(num)) + @cement.ex( help='generates a test suite coverage report for a given problem', arguments=[ @@ -241,7 +253,7 @@ def repair(self) -> bool: # setup logging to file if should_log_to_file: if not log_to_filename: - log_to_filename = self._default_log_filename + log_to_filename = self._default_eval_log_filename logger.info(f'logging to file: {log_to_filename}') logger.add(log_to_filename, level='TRACE') @@ -301,6 +313,63 @@ def repair(self) -> bool: else: sys.exit(1) + @cement.ex( + help='evaluate a repair specified by patch using additional criteria', + arguments=[ + (['filename'], + {'help': ('a Darjeeling configuration file describing a faulty ' + 'program and how it should be repaired.')}), + (['--patch-dir'], + {'help': 'path containing patches to restore and evaluate.', + 'dest': 'dir_patches', + 'type': str}), + (['--log-to-file'], + {'help': 'path to store the log file.', + 'type': str}), + (['--threads'], + {'dest': 'threads', + 'type': int, + 'help': ('number of threads over which the repair workload ' + 'should be distributed')}) + ] + ) + def evaluate(self) -> None: + """Evaluates a given program.""" + # load the configuration file + filename = self.app.pargs.filename + filename = os.path.abspath(filename) + cfg_dir = os.path.dirname(filename) + dir_patches: Optional[str] = self.app.pargs.dir_patches + threads: Optional[int] = self.app.pargs.threads + log_to_filename: Optional[str] = self.app.pargs.log_to_file + + logger.remove() + logger.enable('darjeeling') + for plugin_name in LOADED_PLUGINS: + logger.enable(plugin_name) + + with open(filename, 'r') as f: + yml = yaml.safe_load(f) + + if not log_to_filename: + log_to_filename = self._default_log_filename + logger.info(f'logging to file: {log_to_filename}') + logger.add(log_to_filename, level='TRACE') + cfg = EvaluateConfig.from_yml(yml=yml, + dir_=cfg_dir, + dir_patches=dir_patches, + threads=threads) + + with Environment() as environment: + try: + session = EvaluateSession.from_config(environment, cfg) + except BadConfigurationException: + print("ERROR: bad configuration file") + sys.exit(1) + + session.run() + session.close() + class CLI(cement.App): class Meta: diff --git a/src/darjeeling/config.py b/src/darjeeling/config.py index 904b383..b435e50 100644 --- a/src/darjeeling/config.py +++ b/src/darjeeling/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __all__ = ('Config', 'OptimizationsConfig', 'CoverageConfig', - 'LocalizationConfig') + 'LocalizationConfig', 'EvaluateConfig') from typing import Any, Collection, Dict, List, NoReturn, Optional, Set import datetime @@ -138,10 +138,10 @@ class Config: """ dir_patches: str = attr.ib() program: ProgramDescriptionConfig - transformations: ProgramTransformationsConfig - localization: LocalizationConfig + transformations: Optional[ProgramTransformationsConfig] + localization: Optional[LocalizationConfig] search: SearcherConfig - coverage: CoverageConfig + coverage: Optional[CoverageConfig] resource_limits: ResourceLimits seed: int = attr.ib(default=0) optimizations: OptimizationsConfig = attr.ib(factory=OptimizationsConfig) @@ -240,7 +240,7 @@ def err(m: str) -> NoReturn: # coverage config if 'coverage' in yml: if plus: - yml['coverage']['method']['type']='plus' + yml['coverage']['method']['type'] = 'plus' coverage = CoverageConfig.from_dict(yml['coverage'], dir_) else: m = "'coverage' section is expected" @@ -279,3 +279,93 @@ def err(m: str) -> NoReturn: search=search, optimizations=opts, dir_patches=dir_patches) + + +@attr.s(frozen=True, auto_attribs=True) +class EvaluateConfig(Config): + """A configuration for Darjeeling to evaluate patches with additional content. + + Attributes + ---------- + dir_patches: str + The absolute path to the directory to which patches are saved. + seed: int + The seed that should be used by the random number generator. + threads: int + The number of threads over which the search should be distributed. + program: ProgramDescriptionConfig + A description of the program under transformation. + resource_limits: ResourceLimits + Limits on the resources that may be consumed during the search. + + """ + # search: SearcherConfig + # program: ProgramDescriptionConfig + # resource_limits: ResourceLimits + # dir_patches: str = attr.ib() + # threads: int = attr.ib(default=1) + + @staticmethod + def from_yml(yml: Dict[str, Any], + dir_: Optional[str] = None, + *, + terminate_early: bool = True, + plus: bool = False, + seed: Optional[int] = None, + threads: Optional[int] = None, + run_redundant_tests: bool = False, + limit_candidates: Optional[int] = None, + limit_time_minutes: Optional[int] = None, + dir_patches: Optional[str] = None + ) -> 'EvaluateConfig': + """Loads a configuration from a YAML dictionary. + + Raises + ------ + BadConfigurationException + If an illegal configuration is provided. + """ + def err(m: str) -> NoReturn: + raise BadConfigurationException(m) + + if dir_patches is None and 'save-patches-to' in yml: + dir_patches = yml['save-patches-to'] + if not isinstance(dir_patches, str): + err("'save-patches-to' property should be a string") + if not os.path.isabs(dir_patches): + if not dir_: + err("'save-patches-to' must be absolute for non-file-based configurations") + dir_patches = os.path.join(dir_, dir_patches) + elif dir_patches is None: + if not dir_: + err("'save-patches-to' must be specified for non-file-based configurations") + dir_patches = os.path.join(dir_, 'patches') + + if threads is None and 'threads' in yml: + if not isinstance(yml['threads'], int): + err("'threads' property should be an int") + threads = yml['threads'] + elif threads is None: + threads = 1 + + # resource limits + yml.setdefault('resource-limits', {}) + + resource_limits = \ + ResourceLimits.from_dict(yml['resource-limits'], dir_) + + if 'program' not in yml: + err("'program' section is missing") + program = ProgramDescriptionConfig.from_dict(dict_=yml['program'], dir_=dir_, heldout=True) + + search = SearcherConfig.from_dict({'type': 'reviewer'}, dir_) + + return EvaluateConfig(threads=threads, + program=program, + search=search, + dir_patches=dir_patches, + resource_limits=resource_limits, + transformations=None, + localization=None, + coverage=None + ) diff --git a/src/darjeeling/coverage/config.py b/src/darjeeling/coverage/config.py index 6bd8953..f9d271f 100644 --- a/src/darjeeling/coverage/config.py +++ b/src/darjeeling/coverage/config.py @@ -112,7 +112,7 @@ def build(self, # exclude yacc and lex files def is_yacc_or_lex_file(filename: str) -> bool: return filename.endswith(".y") or filename.endswith(".l") - + covered_files = set(filename for test_coverage in coverage.values() for filename in test_coverage.lines.files) restrict_to_files = set(filename for filename in covered_files if not is_yacc_or_lex_file(filename)) coverage = coverage.restrict_to_files(restrict_to_files) diff --git a/src/darjeeling/coverage/gcov.py b/src/darjeeling/coverage/gcov.py index e124afc..94dd98a 100644 --- a/src/darjeeling/coverage/gcov.py +++ b/src/darjeeling/coverage/gcov.py @@ -34,7 +34,7 @@ " exit(sig);\n" "}\n" "void __attribute__ ((constructor)) darjeeling_ctor (void) {\n" - #"void darjeeling_ctor (void) {\n" + # "void darjeeling_ctor (void) {\n" " struct sigaction new_action;\n" " new_action.sa_handler = darjeeling_sighandler;\n" " sigemptyset(&new_action.sa_mask);\n" @@ -132,8 +132,6 @@ def _find_source_filenames(self, """Determines the set of all source files within a program.""" with program.provision() as container: source_directory = program.source_directory - build_directory = program.build_directory - src_subdirectory = program.src_subdirectory endings = ('.cpp', '.cc', '.c', '.h', '.hh', '.hpp', '.cxx') command = ' -o '.join([f"-name \*{e}" for e in endings]) command = f'find {source_directory} -type f \( {command} \)' @@ -228,7 +226,7 @@ def _resolve_filepath(self, filename_relative: str) -> str: def _resolve_filepath_pdr(self, base_filename: str) -> str: base = os.path.basename(base_filename) # may make sense to check for duplicate basenames, but TBD - source_lut = { os.path.basename(fn): fn for fn in self._source_filenames } + source_lut = {os.path.basename(fn): fn for fn in self._source_filenames} return os.path.relpath(source_lut.get(base, base), self._source_directory) def _parse_xml_report(self, root: ET.Element) -> FileLineSet: @@ -268,12 +266,12 @@ def _extract(self, container: 'ProgramContainer') -> FileLineSet: command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' if self._src_subdirectory and self._src_subdirectory != "": - command+=f" {self._src_subdirectory} " + command += f" {self._src_subdirectory} " logger.trace(f"executing gcovr command: {command}") - fpath=self._build_directory + fpath = self._build_directory logger.info(f"executing gcovr command: '{command}' in '{fpath}'") - #gcda=shell.check_output("find . -type f -name \"*.gcda\"", cwd=fpath) - #logger.info(f"GCDA: \n>>>{gcda}\n<<<") + # gcda=shell.check_output("find . -type f -name \"*.gcda\"", cwd=fpath) + # logger.info(f"GCDA: \n>>>{gcda}\n<<<") shell.check_call(command, cwd=fpath) xml_file_contents = files.read(temporary_filename) logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") diff --git a/src/darjeeling/coverage/plus.py b/src/darjeeling/coverage/plus.py index d6d8c30..17d7fed 100644 --- a/src/darjeeling/coverage/plus.py +++ b/src/darjeeling/coverage/plus.py @@ -3,21 +3,21 @@ import os import typing as t -import xml.etree.ElementTree as ET +# import xml.etree.ElementTree as ET from loguru import logger import attr from .collector import CoverageCollector, CoverageCollectorConfig from ..core import FileLineSet -from ..source import ProgramSourceFile +# from ..source import ProgramSourceFile if t.TYPE_CHECKING: from ..container import ProgramContainer from ..environment import Environment from ..program import ProgramDescription -#_INSTRUMENTATION = ( +# _INSTRUMENTATION = ( # "/* DARJEELING :: INSTRUMENTATION :: START */\n" # "#include \n" # "#include \n" @@ -51,9 +51,9 @@ # " signal(SIGUSR2, darjeeling_sighandler);\n" # "}\n" # "/* DARJEELING :: INSTRUMENTATION :: END */\n" -#) -#_NUM_INSTRUMENTATION_LINES = _INSTRUMENTATION.count('\n') -#_LINES_TO_REMOVE = set(range(1, _NUM_INSTRUMENTATION_LINES)) +# ) +# _NUM_INSTRUMENTATION_LINES = _INSTRUMENTATION.count('\n') +# _LINES_TO_REMOVE = set(range(1, _NUM_INSTRUMENTATION_LINES)) # # @attr.s(auto_attribs=True, slots=True) @@ -117,8 +117,8 @@ def _find_source_filenames(self, """Determines the set of all source files within a program.""" with program.provision() as container: source_directory = program.source_directory - build_directory = program.build_directory - src_subdirectory = program.src_subdirectory + # build_directory = program.build_directory + # src_subdirectory = program.src_subdirectory endings = ('.cpp', '.cc', '.c', '.h', '.hh', '.hpp', '.cxx') command = ' -o '.join([f"-name \*{e}" for e in endings]) command = f'find {source_directory} -type f \( {command} \)' @@ -160,172 +160,172 @@ class PlusCollector(CoverageCollector): _source_filenames: t.FrozenSet[str] _environment: 'Environment' = attr.ib(repr=False) - #def _read_line_coverage_for_class(self, xml_class: ET.Element) -> t.Set[int]: - # xml_lines = xml_class.find('lines') - # assert xml_lines - # lines = xml_lines.findall('line') - # return set(int(line.attrib['number']) for line in lines - # if int(line.attrib['hits']) > 0) + # def _read_line_coverage_for_class(self, xml_class: ET.Element) -> t.Set[int]: + # xml_lines = xml_class.find('lines') + # assert xml_lines + # lines = xml_lines.findall('line') + # return set(int(line.attrib['number']) for line in lines + # if int(line.attrib['hits']) > 0) - #def _corrected_lines(self, - # relative_filename: str, - # lines: t.Set[int] - # ) -> t.Set[int]: - # if os.path.isabs(relative_filename): - # absolute_filename = relative_filename - # else: - # absolute_filename = os.path.join(self._source_directory, relative_filename) + # def _corrected_lines(self, + # relative_filename: str, + # lines: t.Set[int] + # ) -> t.Set[int]: + # if os.path.isabs(relative_filename): + # absolute_filename = relative_filename + # else: + # absolute_filename = os.path.join(self._source_directory, relative_filename) # - # instrumented_filenames = set(f.filename for f in self._files_to_instrument) - # if absolute_filename not in instrumented_filenames: - # logger.trace(f"file was not instrumented: {absolute_filename}") - # return lines + # instrumented_filenames = set(f.filename for f in self._files_to_instrument) + # if absolute_filename not in instrumented_filenames: + # logger.trace(f"file was not instrumented: {absolute_filename}") + # return lines # - # lines = lines - _LINES_TO_REMOVE - # return set(i - _NUM_INSTRUMENTATION_LINES for i in lines) + # lines = lines - _LINES_TO_REMOVE + # return set(i - _NUM_INSTRUMENTATION_LINES for i in lines) + + # def _has_source_file(self, filename_relative: str) -> bool: + # source_directory = self._source_directory + # filename_absolute = os.path.join(source_directory, filename_relative) + # return filename_absolute in self._source_filenames - #def _has_source_file(self, filename_relative: str) -> bool: - # source_directory = self._source_directory - # filename_absolute = os.path.join(source_directory, filename_relative) - # return filename_absolute in self._source_filenames - def get_relative_filename(self, filename_absolute: str) -> str: from re import sub - relative_filename = sub(self._source_directory+"/", "", filename_absolute) + relative_filename = sub(self._source_directory + "/", "", filename_absolute) return relative_filename def _has_source_file(self, filename_absolute: str) -> bool: return filename_absolute in self._source_filenames - ## FIXME is this a general solution? nope, not a general solution - #def _resolve_filepath(self, filename_relative: str) -> str: - # if not filename_relative: - # raise ValueError('failed to resolve path') - # if self._has_source_file(filename_relative): - # return filename_relative - # - # filename_relative_child = '/'.join(filename_relative.split('/')[1:]) - # return self._resolve_filepath(filename_relative_child) + # # FIXME is this a general solution? nope, not a general solution + # def _resolve_filepath(self, filename_relative: str) -> str: + # if not filename_relative: + # raise ValueError('failed to resolve path') + # if self._has_source_file(filename_relative): + # return filename_relative + # + # filename_relative_child = '/'.join(filename_relative.split('/')[1:]) + # return self._resolve_filepath(filename_relative_child) - #def _resolve_filepath_pdr(self, base_filename: str) -> str: - # src_file=os.path.join(self._src_subdirectory,base_filename) - # return self._resolve_filepath(src_file) + # def _resolve_filepath_pdr(self, base_filename: str) -> str: + # src_file=os.path.join(self._src_subdirectory,base_filename) + # return self._resolve_filepath(src_file) - #def _parse_xml_report(self, root: ET.Element) -> FileLineSet: - # packages_node = root.find('packages') - # assert packages_node - # package_nodes = packages_node.findall('package') - # class_nodes = [c for p in package_nodes for c in p.find('classes').findall('class')] # type: ignore + # def _parse_xml_report(self, root: ET.Element) -> FileLineSet: + # packages_node = root.find('packages') + # assert packages_node + # package_nodes = packages_node.findall('package') + # class_nodes = [c for p in package_nodes for c in p.find('classes').findall('class')] # type: ignore # - # filename_to_lines: t.Dict[str, t.Set[int]] = {} - # for node in class_nodes: - # filename = node.attrib['filename'] - # try: - # filename_original = filename - # filename = self._resolve_filepath_pdr(filename) - # logger.trace(f"resolving path '{filename_original}' " - # f"-> '{filename}'") - # except ValueError: - # logger.warning(f'failed to resolve file: {filename}') - # continue + # filename_to_lines: t.Dict[str, t.Set[int]] = {} + # for node in class_nodes: + # filename = node.attrib['filename'] + # try: + # filename_original = filename + # filename = self._resolve_filepath_pdr(filename) + # logger.trace(f"resolving path '{filename_original}' " + # f"-> '{filename}'") + # except ValueError: + # logger.warning(f'failed to resolve file: {filename}') + # continue # - # lines = self._read_line_coverage_for_class(node) - # lines = self._corrected_lines(filename, lines) - # if lines: - # filename_to_lines[filename] = lines + # lines = self._read_line_coverage_for_class(node) + # lines = self._corrected_lines(filename, lines) + # if lines: + # filename_to_lines[filename] = lines # - # return FileLineSet(filename_to_lines) + # return FileLineSet(filename_to_lines) - def obtain_faults(self,fplus:dict) -> FileLineSet: + def obtain_faults(self, fplus: dict) -> FileLineSet: ftl: t.Dict[str, t.Set[int]] = {} logger.info(f"self._source_directory: {self._source_directory}") if fplus: - for idx in ['addsans','ubsans']: - floc=fplus.get(idx,None) + for idx in ['addsans', 'ubsans']: + floc = fplus.get(idx, None) if floc: - for f in floc: - traces=f.get('trace',None) - if traces: - for trace in traces: - if trace: - fname=trace[0] - fdir =trace[1] - fline=trace[2] - fcol =trace[3] - ffun =trace[4] - if fname != "" and fdir != "": - try: - fpath = os.path.join(fdir, fname) - absolute_filename = os.path.abspath(fpath) - rel_file=self.get_relative_filename(absolute_filename) - x=ftl.get(rel_file,None) - if not x: - ftl[rel_file]=set() - ftl[rel_file].add(fline) - logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") - except Exception as e: - raise(e) - else: - loc=f.get('loc',None) - if loc: - fname=loc[0] - fdir =loc[1] if isinstance(loc[1],str) else loc[1][0] - fline=loc[2] - fcol =loc[3] - ffun =loc[4] - if fname != "" and fdir != "": - try: - fpath = os.path.join(fdir, fname) - absolute_filename = os.path.abspath(fpath) - rel_file=self.get_relative_filename(absolute_filename) - x=ftl.get(rel_file,None) - if not x: - ftl[rel_file]=set() - ftl[rel_file].add(fline) - logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") - except Exception as e: - raise(e) + for f in floc: + traces = f.get('trace', None) + if traces: + for trace in traces: + if trace: + fname = trace[0] + fdir = trace[1] + fline = trace[2] + fcol = trace[3] + ffun = trace[4] + if fname != "" and fdir != "": + try: + fpath = os.path.join(fdir, fname) + absolute_filename = os.path.abspath(fpath) + rel_file = self.get_relative_filename(absolute_filename) + x = ftl.get(rel_file, None) + if not x: + ftl[rel_file] = set() + ftl[rel_file].add(fline) + logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") + except Exception as e: + raise(e) + else: + loc = f.get('loc', None) + if loc: + fname = loc[0] + fdir = loc[1] if isinstance(loc[1], str) else loc[1][0] + fline = loc[2] + fcol = loc[3] + ffun = loc[4] + if fname != "" and fdir != "": + try: + fpath = os.path.join(fdir, fname) + absolute_filename = os.path.abspath(fpath) + rel_file = self.get_relative_filename(absolute_filename) + x = ftl.get(rel_file, None) + if not x: + ftl[rel_file] = set() + ftl[rel_file].add(fline) + logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") + except Exception as e: + raise(e) return FileLineSet(ftl) - #def _parse_xml_file_contents(self, contents: str) -> FileLineSet: + # def _parse_xml_file_contents(self, contents: str) -> FileLineSet: # logger.trace(f"Parsing gcovr report:\n{contents}") # root = ET.fromstring(contents) # return self._parse_xml_report(root) def _extract(self, container: 'ProgramContainer') -> FileLineSet: files = container.filesystem - fplus="/benchmarks/SanitizerResults/bothSan.json" - fplusd=None + fplus = "/benchmarks/SanitizerResults/bothSan.json" + fplusd = None try: - json_in=files.read(fplus) + json_in = files.read(fplus) import json - fplusd=json.loads(json_in) + fplusd = json.loads(json_in) except Exception as e: logger.error(f'Darjeeling Plus localization issue\nException:\n{e}') raise(e) return self.obtain_faults(fplusd) - #shell = container.shell - #temporary_filename = files.mktemp() + # shell = container.shell + # temporary_filename = files.mktemp() + # + # command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' + # if self._src_subdirectory and self._src_subdirectory != "": + # command+=f" {self._src_subdirectory} " + # logger.trace(f"executing gcovr command: {command}") + # # fpath=os.path.join(self._build_directory,self._src_subdirectory) + # fpath=self._build_directory + # logger.info(f"executing gcovr command: '{command}' in '{fpath}'") + # shell.check_call(command, cwd=fpath) + # xml_file_contents = files.read(temporary_filename) + # logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") # - #command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' - #if self._src_subdirectory and self._src_subdirectory != "": - # command+=f" {self._src_subdirectory} " - #logger.trace(f"executing gcovr command: {command}") - ##fpath=os.path.join(self._build_directory,self._src_subdirectory) - #fpath=self._build_directory - #logger.info(f"executing gcovr command: '{command}' in '{fpath}'") - #shell.check_call(command, cwd=fpath) - #xml_file_contents = files.read(temporary_filename) - #logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") - # - #return self._parse_xml_file_contents(xml_file_contents) + # return self._parse_xml_file_contents(xml_file_contents) - #def _instrument( + # def _instrument( # self, # filename: str, # contents: str, # inject_at_line: int, - #) -> str: + # ) -> str: # file_ = ProgramSourceFile(filename, contents) # inject_at_location = file_.line_to_location_range(inject_at_line).start # inject_at_offset = file_.location_to_offset(inject_at_location) @@ -336,7 +336,7 @@ def _prepare(self, container: 'ProgramContainer') -> None: """ """ pass - #def _prepare(self, container: 'ProgramContainer') -> None: + # def _prepare(self, container: 'ProgramContainer') -> None: # """ # Adds source code instrumentation and recompiles the program inside # a container using the appropriate GCC options. Also ensures that diff --git a/src/darjeeling/evaluator.py b/src/darjeeling/evaluator.py index 2a85fac..ca89bb9 100644 --- a/src/darjeeling/evaluator.py +++ b/src/darjeeling/evaluator.py @@ -18,7 +18,7 @@ from loguru import logger from . import exceptions as exc -from .candidate import Candidate +from .candidate import Candidate, DiffCandidate from .container import ProgramContainer from .outcome import (BuildOutcome, CandidateOutcome, CandidateOutcomeStore, TestOutcome, TestOutcomeSet) @@ -118,22 +118,26 @@ def _filter_redundant_tests(self, candidate: Candidate, tests: List[Test] ) -> Tuple[List[Test], Set[Test]]: - line_coverage_by_test = self.__problem.coverage - lines_changed = candidate.lines_changed() + if self.__problem.coverage: + line_coverage_by_test = self.__problem.coverage + lines_changed = candidate.lines_changed() - # if no lines are changed, retain all tests (fixes issue #128) - if not lines_changed: - return (tests, set()) + # if no lines are changed, retain all tests (fixes issue #128) + if not lines_changed: + return (tests, set()) - keep: List[Test] = [] - drop: Set[Test] = set() - for test in tests: - test_line_coverage = line_coverage_by_test[test.name] - if not any(line in test_line_coverage for line in lines_changed): - drop.add(test) - else: - keep.append(test) - return (keep, drop) + keep: List[Test] = [] + drop: Set[Test] = set() + for test in tests: + test_line_coverage = line_coverage_by_test[test.name] + if not any(line in test_line_coverage for line in lines_changed): + drop.add(test) + else: + keep.append(test) + return (keep, drop) + else: + logger.warning("Attempting to run coverage-based evaluation on incompatible configuration") + return (tests, set()) def _run_test(self, container: ProgramContainer, @@ -161,10 +165,12 @@ def _run_test(self, outcome = TestOutcome(successful=False, time_taken=timer.duration) + id_ = " heldout" if isinstance(candidate, DiffCandidate) else "" + if not outcome.successful: - logger.debug(f"* test failed: {test.name} ({candidate})") + logger.debug(f"*{id_} test failed: {test.name} ({candidate})") else: - logger.debug(f"* test passed: {test.name} ({candidate})") + logger.debug(f"*{id_} test passed: {test.name} ({candidate})") self.dispatch(TestExecutionFinished(candidate, test, outcome)) return outcome diff --git a/src/darjeeling/events/event.py b/src/darjeeling/events/event.py index f66d0bc..93b9dfa 100644 --- a/src/darjeeling/events/event.py +++ b/src/darjeeling/events/event.py @@ -8,6 +8,7 @@ TestOutcome as _TestOutcome, BuildOutcome as _BuildOutcome) from ..candidate import Candidate as _Candidate + from ..outcome import CandidateOutcome as _CandidateOutcome diff --git a/src/darjeeling/localization.py b/src/darjeeling/localization.py index 991b7c8..6c9d252 100644 --- a/src/darjeeling/localization.py +++ b/src/darjeeling/localization.py @@ -150,7 +150,7 @@ def from_config(coverage: TestCoverageMap, m = f"suspiciousness metric not supported: {cfg.metric}" raise BadConfigurationException(m) logger.info(f"using suspiciousness metric: {cfg.metric}") - #logger.info(f"coverage: {str(coverage)}") + # logger.info(f"coverage: {str(coverage)}") logger.debug(f"coverage: {str(coverage)}") loc = Localization.from_coverage(coverage, metric) diff --git a/src/darjeeling/problem.py b/src/darjeeling/problem.py index d4516ba..25ed079 100644 --- a/src/darjeeling/problem.py +++ b/src/darjeeling/problem.py @@ -14,6 +14,7 @@ from .source import ProgramSource, ProgramSourceLoader from .exceptions import NoFailingTests, NoImplicatedLines + if typing.TYPE_CHECKING: from .config import Config, OptimizationsConfig from .core import Language, TestCoverageMap @@ -52,25 +53,55 @@ class Problem: environment: 'Environment' config: 'Config' language: 'Language' - coverage: 'TestCoverageMap' + coverage: 'Optional[TestCoverageMap]' sources: ProgramSource program: 'ProgramDescription' failing_tests: Sequence[Test] passing_tests: Sequence[Test] test_ordering: Iterable[Test] analysis: Optional[Analysis] - localization: 'Localization' + localization: 'Optional[Localization]' @staticmethod def build(environment: 'Environment', config: 'Config', language: 'Language', - coverage: 'TestCoverageMap', + coverage: 'Optional[TestCoverageMap]', program: 'ProgramDescription', - localization: 'Localization', + localization: 'Optional[Localization]', *, analysis: Optional[Analysis] = None, + patch_files: set = set(), ) -> 'Problem': + if coverage and localization: + return Problem.build_default( + environment, + config, + language, + coverage, + program, + localization, + analysis=analysis + ) + else: + return Problem.build_evaluation( + environment, + config, + language, + program, + patch_files=patch_files + ) + + @staticmethod + def build_default(environment: 'Environment', + config: 'Config', + language: 'Language', + coverage: 'TestCoverageMap', + program: 'ProgramDescription', + localization: 'Localization', + *, + analysis: Optional[Analysis] = None + ) -> 'Problem': """Constructs a problem description. Raises @@ -145,6 +176,49 @@ def ordering(x: Test, y: Test) -> int: problem.validate() return problem + @staticmethod + def build_evaluation(environment: 'Environment', + config: 'Config', + language: 'Language', + program: 'ProgramDescription', + *, + patch_files: set, + ) -> 'Problem': + """Constructs a Problem description based on Patch file for evaluation only. + + Raises + ------- + """ + + passing_tests: Sequence[Test] = tuple(program.tests) + + failing_tests: Sequence[Test] = tuple() + + logger.info("ordering test cases") + test_ordering: Sequence[Test] = \ + tuple(program.tests) + logger.info('test order: {}', ', '.join(t.name for t in test_ordering)) + + logger.debug("storing contents of source code files") + source_files = set(patch_files) + source_loader = ProgramSourceLoader(environment) + sources = source_loader.for_program(program, files=source_files) + logger.debug("stored contents of source code files") + + solution = Problem(environment=environment, + program=program, + language=language, + sources=sources, + config=config, + passing_tests=passing_tests, + failing_tests=failing_tests, + test_ordering=test_ordering, + analysis=None, + localization=None, + coverage=None + ) + return solution + def validate(self) -> None: """ Ensures that this repair problem is valid. To be considered valid, a @@ -183,8 +257,10 @@ def lines(self) -> Iterator[FileLine]: Returns an iterator over the lines that are implicated by the description of this problem. """ - yield from self.coverage.failing.locations + if self.coverage: + yield from self.coverage.failing.locations @property def implicated_files(self) -> Iterator[str]: - yield from set(location.filename for location in self.coverage.failing.locations) + if self.coverage: + yield from set(location.filename for location in self.coverage.failing.locations) diff --git a/src/darjeeling/program.py b/src/darjeeling/program.py index 328b6ca..5b3c142 100644 --- a/src/darjeeling/program.py +++ b/src/darjeeling/program.py @@ -35,7 +35,8 @@ class ProgramDescriptionConfig: @staticmethod def from_dict(dict_: Mapping[str, Any], - dir_: Optional[str] = None + dir_: Optional[str] = None, + heldout: Optional[bool] = False ) -> 'ProgramDescriptionConfig': def err(message: str) -> NoReturn: raise exc.BadConfigurationException(message) @@ -81,11 +82,15 @@ def err(message: str) -> NoReturn: err(f"unsupported language [{dict_['language']}]. {supported}") # test suite - if 'tests' not in dict_: - err("'tests' section is missing from 'program' section") - if not isinstance(dict_['tests'], dict): - err("'tests' section should be an object") - tests = TestSuiteConfig.from_dict(dict_.get('tests', {}), dir_) + # populate with 'heldout-tests' content only when specified + # 'tests' is default behavior + tests_key = 'tests' if not heldout else 'heldout-tests' + + if tests_key not in dict_: + err(f"'{tests_key} section is missing from 'program' section") + if not isinstance(dict_[tests_key], dict): + err(f"'{tests_key}' section should be an object") + tests = TestSuiteConfig.from_dict(dict_.get(tests_key, {}), dir_) # build instructions if 'build-instructions' not in dict_: diff --git a/src/darjeeling/resources.py b/src/darjeeling/resources.py index 8a78ed6..1aa7186 100644 --- a/src/darjeeling/resources.py +++ b/src/darjeeling/resources.py @@ -46,7 +46,8 @@ def check_limits(self) -> None: ResourceLimitReached If a resource limit has been reached. """ - self.limits.check(self) + if self.limits: + self.limits.check(self) class ResourceLimit(abc.ABC): diff --git a/src/darjeeling/searcher/__init__.py b/src/darjeeling/searcher/__init__.py index 6b9dbe1..f172764 100644 --- a/src/darjeeling/searcher/__init__.py +++ b/src/darjeeling/searcher/__init__.py @@ -3,3 +3,4 @@ from .base import Searcher from .exhaustive import ExhaustiveSearcher from .genetic import GeneticSearcher +from .reviewer import Reviewer diff --git a/src/darjeeling/searcher/config.py b/src/darjeeling/searcher/config.py index 19ae517..7c9eb1a 100644 --- a/src/darjeeling/searcher/config.py +++ b/src/darjeeling/searcher/config.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- __all__ = ('SearcherConfig',) -from typing import Dict, Optional, Any, Type, Iterator +from typing import Dict, Optional, Any, Type, Iterator, List import abc import typing @@ -12,6 +12,7 @@ from ..problem import Problem from ..resources import ResourceUsageTracker from ..transformation import ProgramTransformations + from ..candidate import DiffPatch @dynamically_registered(lookup='lookup') @@ -43,8 +44,9 @@ def from_dict(cls, def build(self, problem: 'Problem', resources: 'ResourceUsageTracker', - transformations: 'ProgramTransformations', *, + transformations: 'Optional[ProgramTransformations]' = None, + candidates: 'Optional[List[DiffPatch]]' = None, threads: int = 1, run_redundant_tests: bool = False ) -> 'Searcher': diff --git a/src/darjeeling/searcher/exhaustive.py b/src/darjeeling/searcher/exhaustive.py index 9006064..de2dac7 100644 --- a/src/darjeeling/searcher/exhaustive.py +++ b/src/darjeeling/searcher/exhaustive.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- __all__ = ('ExhaustiveSearcher',) -from typing import Any, Dict, Iterable, Iterator, Optional +from typing import Any, Dict, Iterable, Iterator, Optional, List import typing from loguru import logger @@ -10,12 +10,12 @@ from .config import SearcherConfig from ..candidate import Candidate from ..resources import ResourceUsageTracker -from ..transformation import Transformation +from ..transformation import Transformation, ProgramTransformations from ..exceptions import SearchExhausted if typing.TYPE_CHECKING: from ..problem import Problem - from ..transformations import ProgramTransformations + from ..candidate import DiffPatch class ExhaustiveSearcherConfig(SearcherConfig): @@ -38,16 +38,20 @@ def from_dict(cls, def build(self, problem: 'Problem', resources: ResourceUsageTracker, - transformations: 'ProgramTransformations', + transformations: 'ProgramTransformations' = None, *, + candidates: 'List[DiffPatch]' = None, threads: int = 1, run_redundant_tests: bool = False ) -> Searcher: + if not transformations: + transformations = ProgramTransformations.build([], problem) return ExhaustiveSearcher(problem=problem, resources=resources, transformations=transformations, threads=threads, - run_redundant_tests=run_redundant_tests) + run_redundant_tests=run_redundant_tests + ) class ExhaustiveSearcher(Searcher): diff --git a/src/darjeeling/searcher/genetic.py b/src/darjeeling/searcher/genetic.py index d61293e..f5ad304 100644 --- a/src/darjeeling/searcher/genetic.py +++ b/src/darjeeling/searcher/genetic.py @@ -10,7 +10,8 @@ from .base import Searcher from .config import SearcherConfig -from ..candidate import Candidate +from ..candidate import Candidate, DiffPatch + from ..resources import ResourceUsageTracker from ..transformation import Transformation, ProgramTransformations from ..outcome import CandidateOutcome @@ -53,12 +54,15 @@ def from_dict(cls, def build(self, problem: 'Problem', - resources: ResourceUsageTracker, - transformations: ProgramTransformations, + resources: 'ResourceUsageTracker', + transformations: 'ProgramTransformations' = None, *, + candidates: 'Optional[List[DiffPatch]]' = None, threads: int = 1, - run_redundant_tests: bool = False + run_redundant_tests: bool = False, ) -> Searcher: + if not transformations: + transformations = ProgramTransformations.build([], problem) return GeneticSearcher(problem=problem, resources=resources, transformations=transformations, @@ -130,7 +134,7 @@ def tournament_size(self) -> int: def initial(self) -> Population: """Generates an initial population according to this strategy.""" - pop = [] + pop: Population = [] for _ in range(self.population_size): pop.append(Candidate(self.problem, [])) return self.mutate(pop) @@ -175,12 +179,14 @@ def select(self, def mutate(self, pop: Population) -> Population: problem = self.problem - offspring = [] + offspring: Population = [] for ind in pop: child = ind if random.random() <= self.rate_mutation: mutation = self.choose_transformation() - transformations = child.transformations + (mutation,) + transformations = None + if child.transformations: + transformations = child.transformations + (mutation,) child = Candidate(problem, transformations) # type: ignore offspring.append(child) return offspring @@ -190,8 +196,8 @@ def one_point_crossover(px: Candidate, py: Candidate ) -> List[Candidate]: problem = self.problem - tx = list(px.transformations) - ty = list(py.transformations) + tx = list(px.transformations) if px.transformations else list() + ty = list(py.transformations) if py.transformations else list() lx = random.randint(0, len(tx)) ly = random.randint(0, len(ty)) diff --git a/src/darjeeling/searcher/reviewer.py b/src/darjeeling/searcher/reviewer.py new file mode 100644 index 0000000..c72c735 --- /dev/null +++ b/src/darjeeling/searcher/reviewer.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +__all__ = ('Reviewer',) + +from typing import Any, Dict, Iterable, Iterator, Optional +import typing +from typing import List + +from loguru import logger + +from .base import Searcher +from .config import SearcherConfig +from ..candidate import Candidate, DiffCandidate, DiffPatch +from ..resources import ResourceUsageTracker +from ..exceptions import SearchExhausted + +if typing.TYPE_CHECKING: + from ..problem import Problem + from ..transformations import ProgramTransformations + + +class ReviewerConfig(SearcherConfig): + """A configuration for reviewing patches.""" + NAME = 'reviewer' + + def __repr__(self) -> str: + return 'ReviewerConfig()' + + def __str__(self) -> str: + return repr(self) + + @classmethod + def from_dict(cls, + d: Dict[str, Any], + dir_: Optional[str] = None + ) -> 'SearcherConfig': + return ReviewerConfig() + + def build(self, + problem: 'Problem', + resources: 'ResourceUsageTracker', + candidates: 'List[DiffPatch]' = None, + *, + transformations: 'Optional[ProgramTransformations]' = None, + threads: int = 1, + run_redundant_tests: bool = False + ) -> Searcher: + if not candidates: + candidates = [] + return Reviewer(problem=problem, + resources=resources, + candidates=candidates, + threads=threads) + + +class Reviewer(Searcher): + def __init__(self, + problem: 'Problem', + resources: ResourceUsageTracker, + candidates: List[DiffPatch], + *, + threads: int = 1 + ) -> None: + # FIXME for now! + self.__candidates = self.all_candidates(problem=problem, candidates=candidates) + super().__init__(problem=problem, + resources=resources, + threads=threads, + run_redundant_tests=False) + + @staticmethod + def all_candidates(problem: 'Problem', + candidates: Iterable[DiffPatch] + ) -> Iterator[Candidate]: + logger.debug(f"Obtaining all patch candidates") + for c in candidates: + logger.trace(f"Processing {repr(c)}") + print(f"Processing {repr(c)}") + yield DiffCandidate(problem, [], c) + logger.debug(f"Obtained all patch candidates") + + def _generate(self) -> Candidate: + try: + logger.debug('generating candidate patch...') + candidate = next(self.__candidates) + logger.debug(f'generated candidate patch: {candidate}') + return candidate + except StopIteration: + logger.debug('exhausted all candidate patches') + raise SearchExhausted + + def run(self) -> Iterator[Candidate]: + for _ in range(self.num_workers): + candidate = self._generate() + self.evaluate(candidate) + + for candidate, outcome in self.as_evaluated(): + if outcome.is_repair: + logger.trace(f"{repr(candidate)} PASSED additional evaluation criteria.") + print(f"{repr(candidate)} PASSED additional evaluation criteria.") + yield candidate + else: + logger.trace(f"{repr(candidate)} FAILED additional evaluation criteria.") + print(f"{repr(candidate)} FAILED additional evaluation criteria.") + self.evaluate(self._generate()) diff --git a/src/darjeeling/session.py b/src/darjeeling/session.py index eede848..391624a 100644 --- a/src/darjeeling/session.py +++ b/src/darjeeling/session.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -__all__ = ('Session',) +__all__ = ('Session', 'EvaluateSession',) -from typing import Iterator, List +from typing import Iterator, List, Optional, Set import glob import os import random @@ -12,9 +12,10 @@ from bugzoo import Bug as Snapshot from loguru import logger + from .core import Language, TestCoverageMap from .environment import Environment -from .candidate import Candidate +from .candidate import Candidate, DiffPatch from .resources import ResourceUsageTracker from .searcher import Searcher from .problem import Problem @@ -84,18 +85,18 @@ def from_config(environment: Environment, cfg: Config) -> 'Session': # compute coverage logger.info("computing coverage information...") - coverage = cfg.coverage.build(environment, program) + coverage = cfg.coverage.build(environment, program) if cfg.coverage else None logger.info("computed coverage information") logger.debug(f"coverage: {coverage}") # compute localization logger.info("computing fault localization...") localization = \ - Localization.from_config(coverage, cfg.localization) + Localization.from_config(coverage, cfg.localization) if coverage and cfg.localization else None logger.info(f"computed fault localization:\n{localization}") # determine implicated files - files = localization.files + files = localization.files if localization else None if program.language in (Language.CPP, Language.C): kaskara_project = kaskara.Project(dockerblade=environment.dockerblade, @@ -131,7 +132,7 @@ def from_config(environment: Environment, cfg: Config) -> 'Session': snippets = LineSnippetDatabase.for_problem(problem) logger.info(f"constructed database of donor snippets: {len(snippets)} snippets") - transformations = cfg.transformations.build(problem, snippets) + transformations = cfg.transformations.build(problem, snippets) if cfg.transformations else None searcher = cfg.search.build(problem, resources=resources, transformations=transformations, @@ -157,9 +158,12 @@ def problem(self) -> Problem: return self.searcher.problem @property - def coverage(self) -> TestCoverageMap: + def coverage(self) -> Optional[TestCoverageMap]: """The test suite coverage for the program under repair.""" - return self.problem.coverage + if self.problem.coverage: + return self.problem.coverage + else: + return None def attach_handler(self, handler: DarjeelingEventHandler) -> None: super().attach_handler(handler) @@ -228,3 +232,160 @@ def _save_patches_to_disk(self) -> None: def __enter__(self) -> 'Session': self.run() return self + + +@attr.s +class EvaluateSession(DarjeelingEventProducer): + """Used to manage and inspect an interactive evaluation session.""" + dir_patches: str = attr.ib() + _problem: Problem = attr.ib() + searcher: Searcher = attr.ib() + resources: ResourceUsageTracker = attr.ib() + candidates: List[DiffPatch] = attr.ib(factory=list) + _general_patches: List[Candidate] = attr.ib(factory=list) + + def __attrs_post_init__(self) -> None: + DarjeelingEventProducer.__init__(self) + + @staticmethod + def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': + """Creates a new evaluation session according to a given configuration.""" + logger.debug('obtaining content from patch directory') + dir_patches = cfg.dir_patches + + if not os.path.exists(dir_patches): + print(f"Patch directory does not exist: {dir_patches}") + raise RuntimeError + + logger.warning("checking existing patch directory") + candidates: List[DiffPatch] = [] + + logger.warning("clearing existing patch directory of previously identified general-patches") + for fn in glob.glob(f'{dir_patches}/general-*.diff'): + if os.path.isfile(fn): + os.remove(fn) + for fn in glob.glob(f'{dir_patches}/*.diff'): + if os.path.isfile(fn): + logger.debug(f"Reading in {fn}") + diff = open(fn, 'r').read() + fn_name = os.path.basename(fn) + candidates.append(DiffPatch(file=fn_name, patch=Patch.from_unidiff(diff))) + + patched_files: Set[str] = set() + for p in candidates: + patched_files.add(*p.files) + + logger.debug(f"These files were patched: {patched_files}") + if len(patched_files) == 0: + print(f"Patch directory was effectively empty.") + raise RuntimeError + logger.debug('obtained content from patch directory') + + logger.info(f"using {cfg.threads} threads") + logger.info(f"using language: {cfg.program.language.value}") + + # build program + logger.debug("building program...") + program = cfg.program.build(environment) + + resources = ResourceUsageTracker.with_limits(cfg.resource_limits) + + # build problem for solution evaluations + problem = Problem.build_evaluation(environment=environment, + config=cfg, + language=program.language, + program=program, + patch_files=patched_files + ) + + logger.debug(f"built program: {program}") + searcher = cfg.search.build(problem, + resources=resources, + candidates=candidates, + threads=cfg.threads) + # build basic structure to evaluate solutions + evaluation = Problem.build_evaluation(environment=environment, + config=cfg, + language=program.language, + program=program, + patch_files=patched_files) + + # build session + return EvaluateSession(problem=evaluation, + searcher=searcher, + resources=resources, + candidates=candidates, + dir_patches=dir_patches + ) + + @property + def snapshot(self) -> Snapshot: + """The snapshot for the program being repaired.""" + return self.searcher.problem.bug + + @property + def problem(self) -> Problem: + """The repair problem that is being solved in this session.""" + return self.searcher.problem + + def attach_handler(self, handler: DarjeelingEventHandler) -> None: + super().attach_handler(handler) + self.searcher.attach_handler(handler) + + def remove_handler(self, handler: DarjeelingEventHandler) -> None: + super().remove_handler(handler) + self.searcher.remove_handler(handler) + + def run(self) -> None: + logger.info("beginning evaluation process...") + self._general_patches = list(self.searcher) + if not self._general_patches: + logger.info("failed to find a patch that passes evaluation tests") + + @property + def has_found_patch(self) -> bool: + """Returns :code:`True` if an acceptable patch has been found.""" + return len(self._general_patches) > 0 + + @property + def patches(self) -> Iterator[DiffPatch]: + """Returns an iterator over the patches found during this session.""" + for candidate in self._general_patches: + yield candidate.to_diff() + + def close(self) -> None: + """Closes the session.""" + # wait for threads to finish gracefully before exiting + self.searcher.close() + + time_running_mins = self.resources.wall_clock.duration / 60 + logger.info(f"found {len(self._general_patches)} General patches") + logger.info(f"time taken: {time_running_mins:.2f} minutes") + logger.info(f"# test evaluations: {self.resources.tests}") + logger.info(f"# candidate evaluations: {self.resources.candidates}") + + self._save_patches_to_disk() + + def pause(self) -> None: + """Pauses the session.""" + raise NotImplementedError + + def _save_patches_to_disk(self) -> None: + logger.debug("saving patches to disk...") + os.makedirs(self.dir_patches, exist_ok=True) + for i, patch in enumerate(self._general_patches): + diff = str(patch.to_diff()) + fn_patch = os.path.join(self.dir_patches, f'general-{i}.diff') + logger.debug(f"writing patch to {fn_patch}") + try: + with open(fn_patch, 'w') as f: + f.write(diff) + except OSError: + logger.exception(f"failed to write patch: {fn_patch}") + raise + logger.debug(f"wrote patch to {fn_patch}") + logger.debug("saved patches to disk") + + def __enter__(self) -> 'EvaluateSession': + self.run() + return self diff --git a/src/darjeeling/test/shell.py b/src/darjeeling/test/shell.py index 65bc3fe..dc9a507 100644 --- a/src/darjeeling/test/shell.py +++ b/src/darjeeling/test/shell.py @@ -111,10 +111,10 @@ def execute( cwd=self._workdir, time_limit=self._time_limit_seconds, environment=environment, - # 12/7 note from pdr - not all runs seem to return str compatible + # 12/7 note from pdr - not all runs seem to return str compatible # with default decoding in dockerblade # outcome.output does not look like it's used at all - #text=True, + # text=True, text=False, ) logger.trace(f"shell test outcome: {outcome}") diff --git a/src/darjeeling/transformation/base.py b/src/darjeeling/transformation/base.py index f608264..772f5ef 100644 --- a/src/darjeeling/transformation/base.py +++ b/src/darjeeling/transformation/base.py @@ -91,5 +91,6 @@ def find_all_at_lines(self, def find_all(self, problem: 'Problem') -> Iterator[Transformation]: """Finds all transformations using this schema for a given problem.""" - implicated_lines = list(problem.localization) - yield from self.find_all_at_lines(implicated_lines) + if problem.localization: + implicated_lines = list(problem.localization) + yield from self.find_all_at_lines(implicated_lines)