From ad084afd3aa9759c83477ae1e07b617ac9d9ba6f Mon Sep 17 00:00:00 2001 From: pdreiter Date: Thu, 6 Apr 2023 17:31:40 -0700 Subject: [PATCH 1/5] reimplementation of heldout-test evaluation structure based on @ChrisTimperley's feedback issue #300 - tested, but seeing some issues with test timeouts --- src/darjeeling/candidate.py | 50 ++++++++- src/darjeeling/cli/__init__.py | 58 ++++++++++- src/darjeeling/config.py | 86 +++++++++++++++- src/darjeeling/evaluator.py | 12 +-- src/darjeeling/problem.py | 47 +++++++++ src/darjeeling/program.py | 17 +-- src/darjeeling/resources.py | 8 +- src/darjeeling/searcher/__init__.py | 1 + src/darjeeling/searcher/reviewer.py | 97 ++++++++++++++++++ src/darjeeling/session.py | 154 +++++++++++++++++++++++++++- 10 files changed, 511 insertions(+), 19 deletions(-) create mode 100644 src/darjeeling/searcher/reviewer.py diff --git a/src/darjeeling/candidate.py b/src/darjeeling/candidate.py index 669a8e31..6ce55a2e 100644 --- a/src/darjeeling/candidate.py +++ b/src/darjeeling/candidate.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__all__ = ('Candidate',) +__all__ = ('Candidate', 'DiffCandidate',) from typing import Dict, List, Tuple import typing @@ -50,3 +50,51 @@ def id(self) -> str: def __repr__(self) -> str: return "Candidate<#{}>".format(self.id) + +@attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) +class DiffPatch: + _file: str = attr.ib() + _patch: Patch = attr.ib(factory=Patch) + + def to_diff(self) -> Patch: + return self._patch + + @property + def files(self) -> List[str]: + return self._patch.files + + @property + def file_name(self) -> str: + return self._file + + def __repr__(self) -> str: + return "DiffPatch<{}>".format(self.file_name) + +@attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) +class DiffCandidate: + """Represents a repair as a set of atomic program transformations.""" + problem: 'Problem' = attr.ib(hash=False, eq=False) + patch: DiffPatch = attr.ib(factory=DiffPatch) + + def lines_changed(self) -> List[FileLine]: + locs: List[FileLine] = [] + lines = [(f.old_fn,l) for f in patch.__file_patches for h in f.__hunks for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))] + for f,l in lines: + locs.append(FileLine(f,l)) + return locs + + def to_diff(self) -> Patch: + return self.patch.to_diff() + + @property + def file(self) -> str: + return self.patch.file_name + + @property + def id(self) -> str: + """An eight-character hexadecimal identifier for this candidate.""" + hex_hash = hex(abs(hash(self))) + return hex_hash[2:10] + + def __repr__(self) -> str: + return "DiffCandidate<{}#{}>".format(self.file,self.id) \ No newline at end of file diff --git a/src/darjeeling/cli/__init__.py b/src/darjeeling/cli/__init__.py index 1794f6f5..844e519f 100644 --- a/src/darjeeling/cli/__init__.py +++ b/src/darjeeling/cli/__init__.py @@ -16,11 +16,11 @@ from ..environment import Environment from ..problem import Problem from ..version import __version__ as VERSION -from ..config import Config +from ..config import Config, EvaluateConfig from ..events import CsvEventLogger, WebSocketEventHandler from ..plugins import LOADED_PLUGINS from ..resources import ResourceUsageTracker -from ..session import Session +from ..session import Session, EvaluateSession from ..exceptions import BadConfigurationException from ..util import duration_str @@ -301,6 +301,60 @@ def repair(self) -> bool: else: sys.exit(1) + @cement.ex( + help='evaluate a repair specified by patch using additional criteria', + arguments=[ + (['filename'], + {'help': ('a Darjeeling configuration file describing a faulty ' + 'program and how it should be repaired.')}), + (['--patch-dir'], + {'help': 'path containing patches to restore and evaluate.', + 'dest': 'dir_patches', + 'type': str}), + (['--log-to-file'], + {'help': 'path to store the log file.', + 'type': str}), + (['--threads'], + {'dest': 'threads', + 'type': int, + 'help': ('number of threads over which the repair workload ' + 'should be distributed')}) + ] + ) + def evaluate(self) -> None: + """Evaluates a given program.""" + # load the configuration file + filename = self.app.pargs.filename + filename = os.path.abspath(filename) + cfg_dir = os.path.dirname(filename) + dir_patches: Optional[str] = self.app.pargs.dir_patches + threads: Optional[int] = self.app.pargs.threads + log_to_filename: Optional[str] = self.app.pargs.log_to_file + with open(filename, 'r') as f: + yml = yaml.safe_load(f) + + logger.info(f'logging to file: {log_to_filename}') + logger.add(log_to_filename, level='TRACE') + cfg = EvaluateConfig.from_yml(yml=yml, + dir_=cfg_dir, + dir_patches=dir_patches, + threads=threads) + + with bugzoo.server.ephemeral(timeout_connection=120) as client_bugzoo: + environment = Environment(bugzoo=client_bugzoo) + try: + session = EvaluateSession.from_config(environment, cfg) + except BadConfigurationException: + print("ERROR: bad configuration file") + sys.exit(1) + + session.run() + session.close() + + + + + class CLI(cement.App): class Meta: diff --git a/src/darjeeling/config.py b/src/darjeeling/config.py index 904b3832..025c4c93 100644 --- a/src/darjeeling/config.py +++ b/src/darjeeling/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __all__ = ('Config', 'OptimizationsConfig', 'CoverageConfig', - 'LocalizationConfig') + 'LocalizationConfig', 'EvaluateConfig') from typing import Any, Collection, Dict, List, NoReturn, Optional, Set import datetime @@ -279,3 +279,87 @@ def err(m: str) -> NoReturn: search=search, optimizations=opts, dir_patches=dir_patches) + + +@attr.s(frozen=True, auto_attribs=True) +class EvaluateConfig: + """A configuration for Darjeeling to evaluate patches with additional content. + + Attributes + ---------- + dir_patches: str + The absolute path to the directory to which patches are saved. + seed: int + The seed that should be used by the random number generator. + threads: int + The number of threads over which the search should be distributed. + program: ProgramDescriptionConfig + A description of the program under transformation. + """ + search: SearcherConfig + program: ProgramDescriptionConfig + dir_patches: str = attr.ib() + threads: int = attr.ib(default=1) + + @dir_patches.validator + def validate_patches(self, attribute, value): + if not os.path.isabs(value): + m = "patch directory should be an absolute path." + raise BadConfigurationException(m) + + @threads.validator + def validate_threads(self, attribute, value): + if value < 1: + m = "number of threads must be greater than or equal to 1." + raise BadConfigurationException(m) + + @staticmethod + def from_yml(yml: Dict[str, Any], + dir_: Optional[str] = None, + *, + threads: Optional[int] = None, + dir_patches: Optional[str] = None + ) -> 'EvaluateConfig': + """Loads a configuration from a YAML dictionary. + + Raises + ------ + BadConfigurationException + If an illegal configuration is provided. + """ + def err(m: str) -> NoReturn: + raise BadConfigurationException(m) + + if dir_patches is None and 'save-patches-to' in yml: + dir_patches = yml['save-patches-to'] + if not isinstance(dir_patches, str): + err("'save-patches-to' property should be a string") + if not os.path.isabs(dir_patches): + if not dir_: + err("'save-patches-to' must be absolute for non-file-based configurations") + dir_patches = os.path.join(dir_, dir_patches) + elif dir_patches is None: + if not dir_: + err("'save-patches-to' must be specified for non-file-based configurations") + dir_patches = os.path.join(dir_, 'patches') + + if threads is None and 'threads' in yml: + if not isinstance(yml['threads'], int): + err("'threads' property should be an int") + threads = yml['threads'] + elif threads is None: + threads = 1 + + + if 'program' not in yml: + err("'program' section is missing") + program = ProgramDescriptionConfig.from_dict(dict_=yml['program'], dir_=dir_, heldout=True) + + search = SearcherConfig.from_dict({'type':'reviewer'}, dir_) + + return EvaluateConfig( + threads=threads, + program=program, + search=search, + dir_patches=dir_patches) + diff --git a/src/darjeeling/evaluator.py b/src/darjeeling/evaluator.py index 2a85faca..60f46d04 100644 --- a/src/darjeeling/evaluator.py +++ b/src/darjeeling/evaluator.py @@ -18,7 +18,7 @@ from loguru import logger from . import exceptions as exc -from .candidate import Candidate +from .candidate import (Candidate, DiffCandidate) from .container import ProgramContainer from .outcome import (BuildOutcome, CandidateOutcome, CandidateOutcomeStore, TestOutcome, TestOutcomeSet) @@ -36,7 +36,7 @@ if typing.TYPE_CHECKING: from .problem import Problem -Evaluation = Tuple[Candidate, CandidateOutcome] +Evaluation = Tuple[Union[ Candidate, DiffCandidate ], CandidateOutcome] class Evaluator(DarjeelingEventProducer): @@ -137,7 +137,7 @@ def _filter_redundant_tests(self, def _run_test(self, container: ProgramContainer, - candidate: Candidate, + candidate: Union[ Candidate, DiffCandidate ], test: Test ) -> TestOutcome: """Runs a test for a given patch using a provided container.""" @@ -168,7 +168,7 @@ def _run_test(self, self.dispatch(TestExecutionFinished(candidate, test, outcome)) return outcome - def _evaluate(self, candidate: Candidate) -> CandidateOutcome: + def _evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> CandidateOutcome: outcomes = self.__outcomes patch = candidate.to_diff() logger.info(f"evaluating candidate: {candidate}\n{patch}\n") @@ -267,7 +267,7 @@ def _evaluate(self, candidate: Candidate) -> CandidateOutcome: finally: logger.info(f"evaluated candidate: {candidate}") - def evaluate(self, candidate: Candidate) -> Evaluation: + def evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> Evaluation: """Evaluates a given candidate patch.""" outcomes = self.__outcomes self.dispatch(CandidateEvaluationStarted(candidate)) @@ -288,7 +288,7 @@ def evaluate(self, candidate: Candidate) -> Evaluation: self.__num_running -= 1 return (candidate, outcome) - def submit(self, candidate: Candidate) -> 'Future[Evaluation]': + def submit(self, candidate: Union[ Candidate, DiffCandidate ]) -> 'Future[Evaluation]': """Schedules a candidate patch evaluation.""" with self.__lock: self.__num_running += 1 diff --git a/src/darjeeling/problem.py b/src/darjeeling/problem.py index d4516baf..aea46f3b 100644 --- a/src/darjeeling/problem.py +++ b/src/darjeeling/problem.py @@ -14,6 +14,7 @@ from .source import ProgramSource, ProgramSourceLoader from .exceptions import NoFailingTests, NoImplicatedLines + if typing.TYPE_CHECKING: from .config import Config, OptimizationsConfig from .core import Language, TestCoverageMap @@ -145,6 +146,51 @@ def ordering(x: Test, y: Test) -> int: problem.validate() return problem + @staticmethod + def build_evaluation(environment: 'Environment', + config: 'EvaluateConfig', + language: 'Language', + program: 'ProgramDescription', + *, + patch_files: set, + ) -> 'Problem': + """Constructs a Problem description based on Patch file for evaluation only. + + Raises + ------- + """ + + passing_tests: Sequence[Test] = \ + tuple( sorted(program.tests) ) + + failing_tests: Sequence[Test] = tuple() + + logger.info("ordering test cases") + test_ordering: Sequence[Test] = \ + tuple(sorted(program.tests)) + logger.info('test order: {}', ', '.join(t.name for t in test_ordering)) + + logger.debug("storing contents of source code files") + source_files = set(patch_files) + source_loader = ProgramSourceLoader(environment) + sources = source_loader.for_program(program, files=source_files) + logger.debug("stored contents of source code files") + + solution = Problem(environment=environment, + program=program, + language=language, + sources=sources, + config=config, + passing_tests=passing_tests, + failing_tests=failing_tests, + test_ordering=test_ordering, + analysis=None, + coverage=None, + localization=None + ) + + return solution + def validate(self) -> None: """ Ensures that this repair problem is valid. To be considered valid, a @@ -188,3 +234,4 @@ def lines(self) -> Iterator[FileLine]: @property def implicated_files(self) -> Iterator[str]: yield from set(location.filename for location in self.coverage.failing.locations) + diff --git a/src/darjeeling/program.py b/src/darjeeling/program.py index 328b6ca5..d1017267 100644 --- a/src/darjeeling/program.py +++ b/src/darjeeling/program.py @@ -35,7 +35,8 @@ class ProgramDescriptionConfig: @staticmethod def from_dict(dict_: Mapping[str, Any], - dir_: Optional[str] = None + dir_: Optional[str] = None, + heldout : Optional[bool] = False ) -> 'ProgramDescriptionConfig': def err(message: str) -> NoReturn: raise exc.BadConfigurationException(message) @@ -81,11 +82,15 @@ def err(message: str) -> NoReturn: err(f"unsupported language [{dict_['language']}]. {supported}") # test suite - if 'tests' not in dict_: - err("'tests' section is missing from 'program' section") - if not isinstance(dict_['tests'], dict): - err("'tests' section should be an object") - tests = TestSuiteConfig.from_dict(dict_.get('tests', {}), dir_) + # populate with 'heldout-tests' content only when specified + # 'tests' is default behavior + tests_key = 'tests' if not heldout else 'heldout-tests' + + if tests_key not in dict_: + err(f"'{tests_key} section is missing from 'program' section") + if not isinstance(dict_[tests_key], dict): + err(f"'{tests_key}' section should be an object") + tests = TestSuiteConfig.from_dict(dict_.get(tests_key, {}), dir_) # build instructions if 'build-instructions' not in dict_: diff --git a/src/darjeeling/resources.py b/src/darjeeling/resources.py index 8a78ed63..151ab85b 100644 --- a/src/darjeeling/resources.py +++ b/src/darjeeling/resources.py @@ -37,6 +37,11 @@ def with_limits(limits: 'ResourceLimits') -> 'ResourceUsageTracker': """Constructs a new tracker with given resource limits.""" return ResourceUsageTracker(limits=limits) + @staticmethod + def no_limits() -> 'ResourceUsageTracker': + """Constructs a new tracker with given resource limits.""" + return ResourceUsageTracker(limits=None) + def check_limits(self) -> None: """Checks whether the resource limit has been reached, and if so, throws an exception. @@ -46,7 +51,8 @@ def check_limits(self) -> None: ResourceLimitReached If a resource limit has been reached. """ - self.limits.check(self) + if self.limits: + self.limits.check(self) class ResourceLimit(abc.ABC): diff --git a/src/darjeeling/searcher/__init__.py b/src/darjeeling/searcher/__init__.py index 6b9dbe1b..df2f78f1 100644 --- a/src/darjeeling/searcher/__init__.py +++ b/src/darjeeling/searcher/__init__.py @@ -3,3 +3,4 @@ from .base import Searcher from .exhaustive import ExhaustiveSearcher from .genetic import GeneticSearcher +from .reviewer import Reviewer \ No newline at end of file diff --git a/src/darjeeling/searcher/reviewer.py b/src/darjeeling/searcher/reviewer.py new file mode 100644 index 00000000..df58586e --- /dev/null +++ b/src/darjeeling/searcher/reviewer.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +__all__ = ('Reviewer',) + +from typing import Any, Dict, Iterable, Iterator, Optional +import typing +from typing import List + +from loguru import logger + +from .base import Searcher +from .config import SearcherConfig +from ..candidate import DiffCandidate +from ..resources import ResourceUsageTracker +from ..transformation import Transformation +from ..exceptions import SearchExhausted +from bugzoo.core.patch import Patch + +if typing.TYPE_CHECKING: + from ..problem import Problem + from ..transformations import ProgramTransformations + + +class ReviewerConfig(SearcherConfig): + """A configuration for reviewing patches.""" + NAME = 'reviewer' + + def __repr__(self) -> str: + return 'ReviewerConfig()' + + def __str__(self) -> str: + return repr(self) + + @classmethod + def from_dict(cls, + d: Dict[str, Any], + dir_: Optional[str] = None + ) -> 'SearcherConfig': + return ReviewerConfig() + + def build(self, + problem: 'Problem', + resources: ResourceUsageTracker, + candidates: List[Patch], + *, + threads: int = 1 + ) -> Searcher: + return Reviewer(problem=problem, + resources=resources, + candidates=candidates, + threads=threads) + + +class Reviewer(Searcher): + def __init__(self, + problem: 'Problem', + resources: ResourceUsageTracker, + candidates: List[Patch], + *, + threads: int = 1 + ) -> None: + # FIXME for now! + self.__candidates = self.all_candidates(problem=problem, candidates=candidates) + super().__init__(problem=problem, + resources=resources, + threads=threads, + run_redundant_tests=False) + + @staticmethod + def all_candidates(problem: 'Problem', + candidates: Iterable[DiffCandidate] + ) -> Iterator[DiffCandidate]: + logger.debug(f"Obtaining all patch candidates") + for c in candidates: + logger.trace(f"Processing {repr(c)}") + print(f"Processing {repr(c)}") + yield DiffCandidate(problem,c) + logger.debug(f"Obtained all patch candidates") + + def _generate(self) -> DiffCandidate: + try: + logger.debug('generating candidate patch...') + candidate = next(self.__candidates) + logger.debug(f'generated candidate patch: {candidate}') + return candidate + except StopIteration: + logger.debug('exhausted all candidate patches') + raise SearchExhausted + + def run(self) -> Iterator[DiffCandidate]: + for _ in range(self.num_workers): + candidate = self._generate() + self.evaluate(candidate) + + for candidate, outcome in self.as_evaluated(): + if outcome.is_repair: + yield candidate + self.evaluate(self._generate()) diff --git a/src/darjeeling/session.py b/src/darjeeling/session.py index eede8488..af24d7f3 100644 --- a/src/darjeeling/session.py +++ b/src/darjeeling/session.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__all__ = ('Session',) +__all__ = ('Session','EvaluateSession',) from typing import Iterator, List import glob @@ -12,9 +12,10 @@ from bugzoo import Bug as Snapshot from loguru import logger + from .core import Language, TestCoverageMap from .environment import Environment -from .candidate import Candidate +from .candidate import Candidate, DiffPatch from .resources import ResourceUsageTracker from .searcher import Searcher from .problem import Problem @@ -228,3 +229,152 @@ def _save_patches_to_disk(self) -> None: def __enter__(self) -> 'Session': self.run() return self + + +@attr.s +class EvaluateSession(DarjeelingEventProducer): + """Used to manage and inspect an interactive evaluation session.""" + _problem: Problem = attr.ib() + searcher: Searcher = attr.ib() + candidates: List[DiffPatch] = attr.ib(factory=list) + _general_patches: List[DiffPatch] = attr.ib(factory=list) + + def __attrs_post_init__(self) -> None: + DarjeelingEventProducer.__init__(self) + + @staticmethod + def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': + """Creates a new evaluation session according to a given configuration.""" + logger.debug('obtaining content from patch directory') + dir_patches = cfg.dir_patches + + if not os.path.exists(dir_patches): + print(f"Patch directory does not exist: {dir_patches}") + raise RuntimeError + + logger.warning("checking existing patch directory") + candidates:List[DiffPatch] = [] + for fn in glob.glob(f'{dir_patches}/*.diff'): + if os.path.isfile(fn): + logger.debug(f"Reading in {fn}") + diff = open(fn, 'r').read() + fn_name=os.path.basename(fn) + candidates.append(DiffPatch(file=fn_name,patch=Patch.from_unidiff(diff))) + + patched_files = set() + for p in candidates: + patched_files.add(*p.files) + + logger.debug(f"These files were patched: {patched_files}") + if len(patched_files)==0: + print(f"Patch directory was effectively empty.") + raise RuntimeError + logger.debug('obtained content from patch directory') + + logger.info(f"using {cfg.threads} threads") + logger.info(f"using language: {cfg.program.language.value}") + + # build program + logger.debug("building program...") + program = cfg.program.build(environment) + + resources = ResourceUsageTracker.no_limits() + + # build problem for solution evaluations + problem = Problem.build_evaluation(environment=environment, + config=cfg, + language=program.language, + program=program, + patch_files=patched_files + ) + + logger.debug(f"built program: {program}") + searcher = cfg.search.build(problem, + resources=resources, + candidates=candidates, + threads=cfg.threads) + # build basic structure to evaluate solutions + evaluation = Problem.build_evaluation(environment=environment, + config=cfg, + language=program.language, + program=program, + patch_files=patched_files) + + # build session + return EvaluateSession( + problem=evaluation, + searcher=searcher, + general_patches=candidates + ) + + @property + def snapshot(self) -> Snapshot: + """The snapshot for the program being repaired.""" + return self.searcher.problem.bug + + @property + def problem(self) -> Problem: + """The repair problem that is being solved in this session.""" + return self.searcher.problem + + def attach_handler(self, handler: DarjeelingEventHandler) -> None: + super().attach_handler(handler) + self.searcher.attach_handler(handler) + + def remove_handler(self, handler: DarjeelingEventHandler) -> None: + super().remove_handler(handler) + self.searcher.remove_handler(handler) + + def run(self) -> None: + logger.info("beginning evaluation process...") + self._general_patches = list(self.searcher) + if not self._general_patches: + logger.info("failed to find a patch that passes evaluation tests") + + @property + def has_found_patch(self) -> bool: + """Returns :code:`True` if an acceptable patch has been found.""" + return len(self._general_patches) > 0 + + @property + def patches(self) -> Iterator[DiffPatch]: + """Returns an iterator over the patches found during this session.""" + for candidate in self._general_patches: + yield candidate.to_diff() + + def close(self) -> None: + """Closes the session.""" + # wait for threads to finish gracefully before exiting + self.searcher.close() + + time_running_mins = self.resources.wall_clock.duration / 60 + logger.info(f"found {len(self._general_patches)} General patches") + logger.info(f"time taken: {time_running_mins:.2f} minutes") + logger.info(f"# test evaluations: {self.resources.tests}") + logger.info(f"# candidate evaluations: {self.resources.candidates}") + + self._save_patches_to_disk() + + def pause(self) -> None: + """Pauses the session.""" + raise NotImplementedError + + def _save_patches_to_disk(self) -> None: + logger.debug("saving patches to disk...") + os.makedirs(self.dir_patches, exist_ok=True) + for i, patch in enumerate(self._general_patches): + diff = str(patch.to_diff()) + fn_patch = os.path.join(self.dir_patches, f'general-{i}.diff') + logger.debug(f"writing patch to {fn_patch}") + try: + with open(fn_patch, 'w') as f: + f.write(diff) + except OSError: + logger.exception(f"failed to write patch: {fn_patch}") + raise + logger.debug(f"wrote patch to {fn_patch}") + logger.debug("saved patches to disk") + + def __enter__(self) -> 'EvaluateSession': + self.run() + return self \ No newline at end of file From 075e6443c2c0953b970d551003714cfda75c2b19 Mon Sep 17 00:00:00 2001 From: pdreiter Date: Fri, 7 Apr 2023 15:01:48 -0700 Subject: [PATCH 2/5] evaluation infrastructure fully tested and ready for PR --- src/darjeeling/candidate.py | 30 ++++++++++++++++++++++------- src/darjeeling/cli/__init__.py | 6 ++++++ src/darjeeling/config.py | 14 +++++++++++++- src/darjeeling/evaluator.py | 6 ++++-- src/darjeeling/searcher/reviewer.py | 5 +++++ src/darjeeling/session.py | 8 ++++++-- 6 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/darjeeling/candidate.py b/src/darjeeling/candidate.py index 6ce55a2e..67fd238f 100644 --- a/src/darjeeling/candidate.py +++ b/src/darjeeling/candidate.py @@ -7,7 +7,7 @@ from bugzoo.core.patch import Patch import attr -from .core import Replacement, FileLine +from .core import ( Replacement, FileLine ) from .transformation import Transformation from .util import tuple_from_iterable @@ -63,6 +63,10 @@ def to_diff(self) -> Patch: def files(self) -> List[str]: return self._patch.files + @property + def patch(self) -> Patch: + return self._patch + @property def file_name(self) -> str: return self._file @@ -74,21 +78,33 @@ def __repr__(self) -> str: class DiffCandidate: """Represents a repair as a set of atomic program transformations.""" problem: 'Problem' = attr.ib(hash=False, eq=False) - patch: DiffPatch = attr.ib(factory=DiffPatch) + _diffpatch: DiffPatch = attr.ib(factory=DiffPatch) def lines_changed(self) -> List[FileLine]: locs: List[FileLine] = [] - lines = [(f.old_fn,l) for f in patch.__file_patches for h in f.__hunks for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))] - for f,l in lines: - locs.append(FileLine(f,l)) + # no accessibility to bugzoo.core.patch subcontent + #lines = [(f.old_fn,l) for f in self.get_file_patches() for h in f.__hunks for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))] + #for f,l in lines: + # locs.append(FileLine(f,l)) return locs def to_diff(self) -> Patch: - return self.patch.to_diff() + return self._diffpatch.to_diff() + + def get_file_patches(self): + return self._diffpatch._patch.__file_patches + + @property + def diffpatch(self) -> DiffPatch: + return self._diffpatch + + @property + def patch(self) -> Patch: + return self._diffpatch.patch @property def file(self) -> str: - return self.patch.file_name + return self._diffpatch.file_name @property def id(self) -> str: diff --git a/src/darjeeling/cli/__init__.py b/src/darjeeling/cli/__init__.py index 844e519f..30bfe853 100644 --- a/src/darjeeling/cli/__init__.py +++ b/src/darjeeling/cli/__init__.py @@ -330,6 +330,12 @@ def evaluate(self) -> None: dir_patches: Optional[str] = self.app.pargs.dir_patches threads: Optional[int] = self.app.pargs.threads log_to_filename: Optional[str] = self.app.pargs.log_to_file + + logger.remove() + logger.enable('darjeeling') + for plugin_name in LOADED_PLUGINS: + logger.enable(plugin_name) + with open(filename, 'r') as f: yml = yaml.safe_load(f) diff --git a/src/darjeeling/config.py b/src/darjeeling/config.py index 025c4c93..a6af379d 100644 --- a/src/darjeeling/config.py +++ b/src/darjeeling/config.py @@ -295,9 +295,13 @@ class EvaluateConfig: The number of threads over which the search should be distributed. program: ProgramDescriptionConfig A description of the program under transformation. + resource_limits: ResourceLimits + Limits on the resources that may be consumed during the search. + """ search: SearcherConfig program: ProgramDescriptionConfig + resource_limits: ResourceLimits dir_patches: str = attr.ib() threads: int = attr.ib(default=1) @@ -350,6 +354,13 @@ def err(m: str) -> NoReturn: elif threads is None: threads = 1 + # resource limits + yml.setdefault('resource-limits', {}) + + resource_limits = \ + ResourceLimits.from_dict(yml['resource-limits'], dir_) + + if 'program' not in yml: err("'program' section is missing") @@ -361,5 +372,6 @@ def err(m: str) -> NoReturn: threads=threads, program=program, search=search, - dir_patches=dir_patches) + dir_patches=dir_patches, + resource_limits=resource_limits) diff --git a/src/darjeeling/evaluator.py b/src/darjeeling/evaluator.py index 60f46d04..a859ad22 100644 --- a/src/darjeeling/evaluator.py +++ b/src/darjeeling/evaluator.py @@ -161,10 +161,12 @@ def _run_test(self, outcome = TestOutcome(successful=False, time_taken=timer.duration) + id_=" heldout" if isinstance(candidate,DiffCandidate) else "" + if not outcome.successful: - logger.debug(f"* test failed: {test.name} ({candidate})") + logger.debug(f"*{id_} test failed: {test.name} ({candidate})") else: - logger.debug(f"* test passed: {test.name} ({candidate})") + logger.debug(f"*{id_} test passed: {test.name} ({candidate})") self.dispatch(TestExecutionFinished(candidate, test, outcome)) return outcome diff --git a/src/darjeeling/searcher/reviewer.py b/src/darjeeling/searcher/reviewer.py index df58586e..f7f206df 100644 --- a/src/darjeeling/searcher/reviewer.py +++ b/src/darjeeling/searcher/reviewer.py @@ -93,5 +93,10 @@ def run(self) -> Iterator[DiffCandidate]: for candidate, outcome in self.as_evaluated(): if outcome.is_repair: + logger.trace(f"{repr(candidate)} PASSED additional evaluation criteria.") + print(f"{repr(candidate)} PASSED additional evaluation criteria.") yield candidate + else: + logger.trace(f"{repr(candidate)} FAILED additional evaluation criteria.") + print(f"{repr(candidate)} FAILED additional evaluation criteria.") self.evaluate(self._generate()) diff --git a/src/darjeeling/session.py b/src/darjeeling/session.py index af24d7f3..70c0c361 100644 --- a/src/darjeeling/session.py +++ b/src/darjeeling/session.py @@ -234,8 +234,10 @@ def __enter__(self) -> 'Session': @attr.s class EvaluateSession(DarjeelingEventProducer): """Used to manage and inspect an interactive evaluation session.""" + dir_patches: str = attr.ib() _problem: Problem = attr.ib() searcher: Searcher = attr.ib() + resources: ResourceUsageTracker = attr.ib() candidates: List[DiffPatch] = attr.ib(factory=list) _general_patches: List[DiffPatch] = attr.ib(factory=list) @@ -278,7 +280,7 @@ def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': logger.debug("building program...") program = cfg.program.build(environment) - resources = ResourceUsageTracker.no_limits() + resources = ResourceUsageTracker.with_limits(cfg.resource_limits) # build problem for solution evaluations problem = Problem.build_evaluation(environment=environment, @@ -304,7 +306,9 @@ def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': return EvaluateSession( problem=evaluation, searcher=searcher, - general_patches=candidates + resources=resources, + candidates=candidates, + dir_patches=dir_patches ) @property From 6686bdeb663a4739caa489d3fcf45a7046337a68 Mon Sep 17 00:00:00 2001 From: pdreiter Date: Fri, 7 Apr 2023 19:43:37 -0700 Subject: [PATCH 3/5] tox-specific fixes (tested) --- src/darjeeling/candidate.py | 39 +++++++----- src/darjeeling/cli/__init__.py | 28 ++++++--- src/darjeeling/config.py | 60 +++++++++--------- src/darjeeling/coverage/config.py | 2 +- src/darjeeling/coverage/gcov.py | 14 ++--- src/darjeeling/evaluator.py | 46 +++++++------- src/darjeeling/events/event.py | 1 + src/darjeeling/localization.py | 2 +- src/darjeeling/problem.py | 87 ++++++++++++++++++--------- src/darjeeling/program.py | 4 +- src/darjeeling/resources.py | 5 -- src/darjeeling/searcher/__init__.py | 2 +- src/darjeeling/searcher/config.py | 6 +- src/darjeeling/searcher/exhaustive.py | 14 +++-- src/darjeeling/searcher/genetic.py | 24 +++++--- src/darjeeling/searcher/reviewer.py | 32 +++++----- src/darjeeling/session.py | 71 ++++++++++++---------- src/darjeeling/test/shell.py | 4 +- src/darjeeling/transformation/base.py | 5 +- 19 files changed, 253 insertions(+), 193 deletions(-) diff --git a/src/darjeeling/candidate.py b/src/darjeeling/candidate.py index 67fd238f..7760f905 100644 --- a/src/darjeeling/candidate.py +++ b/src/darjeeling/candidate.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- -__all__ = ('Candidate', 'DiffCandidate',) +__all__ = ('Candidate', 'DiffCandidate') -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional import typing from bugzoo.core.patch import Patch import attr -from .core import ( Replacement, FileLine ) +from .core import (Replacement, FileLine) from .transformation import Transformation from .util import tuple_from_iterable @@ -19,13 +19,13 @@ class Candidate: """Represents a repair as a set of atomic program transformations.""" problem: 'Problem' = attr.ib(hash=False, eq=False) - transformations: Tuple[Transformation, ...] = \ + transformations: Optional[Tuple[Transformation, ...]] = \ attr.ib(converter=tuple_from_iterable) def to_diff(self) -> Patch: """Transforms this candidate patch into a concrete, unified diff.""" replacements = \ - map(lambda t: t.to_replacement(), self.transformations) + map(lambda t: t.to_replacement(), self.transformations) if self.transformations else {} replacements_by_file: Dict[str, List[Replacement]] = {} for rep in replacements: fn = rep.location.filename @@ -40,7 +40,10 @@ def lines_changed(self) -> List[FileLine]: Returns a list of source lines that are changed by this candidate patch. """ - return [t.line for t in self.transformations] + if self.transformations: + return [t.line for t in self.transformations] + else: + return [] @property def id(self) -> str: @@ -51,11 +54,12 @@ def id(self) -> str: def __repr__(self) -> str: return "Candidate<#{}>".format(self.id) + @attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) class DiffPatch: _file: str = attr.ib() _patch: Patch = attr.ib(factory=Patch) - + def to_diff(self) -> Patch: return self._patch @@ -66,26 +70,29 @@ def files(self) -> List[str]: @property def patch(self) -> Patch: return self._patch - + @property def file_name(self) -> str: return self._file - + def __repr__(self) -> str: return "DiffPatch<{}>".format(self.file_name) + @attr.s(frozen=True, repr=False, slots=True, auto_attribs=True) -class DiffCandidate: +class DiffCandidate(Candidate): """Represents a repair as a set of atomic program transformations.""" - problem: 'Problem' = attr.ib(hash=False, eq=False) - _diffpatch: DiffPatch = attr.ib(factory=DiffPatch) + _diffpatch: DiffPatch = attr.ib() def lines_changed(self) -> List[FileLine]: locs: List[FileLine] = [] # no accessibility to bugzoo.core.patch subcontent - #lines = [(f.old_fn,l) for f in self.get_file_patches() for h in f.__hunks for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))] - #for f,l in lines: - # locs.append(FileLine(f,l)) + # lines = [(f.old_fn,l) for f in self.get_file_patches()\ + # for h in f.__hunks \ + # for l in range(h.__old_start_at,h.__old_start_at+len(h.__lines))\ + # ] + # for f,l in lines: + # locs.append(FileLine(f,l)) return locs def to_diff(self) -> Patch: @@ -113,4 +120,4 @@ def id(self) -> str: return hex_hash[2:10] def __repr__(self) -> str: - return "DiffCandidate<{}#{}>".format(self.file,self.id) \ No newline at end of file + return "DiffCandidate<{}#{}>".format(self.file, self.id) diff --git a/src/darjeeling/cli/__init__.py b/src/darjeeling/cli/__init__.py index 30bfe853..7d0fff02 100644 --- a/src/darjeeling/cli/__init__.py +++ b/src/darjeeling/cli/__init__.py @@ -96,6 +96,18 @@ def _default_log_filename(self) -> str: num = max(used_numbers) + 1 return os.path.join(os.getcwd(), 'darjeeling.log.{}'.format(num)) + @property + def _default_eval_log_filename(self) -> str: + # find all log file numbers that have been used in this directory + used_numbers = [int(s.rpartition('.')[-1]) + for s in glob.glob('evaluation.log.[0-9]*')] + + if not used_numbers: + return os.path.join(os.getcwd(), 'evaluation.log.0') + + num = max(used_numbers) + 1 + return os.path.join(os.getcwd(), 'evaluation.log.{}'.format(num)) + @cement.ex( help='generates a test suite coverage report for a given problem', arguments=[ @@ -241,7 +253,7 @@ def repair(self) -> bool: # setup logging to file if should_log_to_file: if not log_to_filename: - log_to_filename = self._default_log_filename + log_to_filename = self._default_eval_log_filename logger.info(f'logging to file: {log_to_filename}') logger.add(log_to_filename, level='TRACE') @@ -339,12 +351,14 @@ def evaluate(self) -> None: with open(filename, 'r') as f: yml = yaml.safe_load(f) + if not log_to_filename: + log_to_filename = self._default_log_filename logger.info(f'logging to file: {log_to_filename}') logger.add(log_to_filename, level='TRACE') - cfg = EvaluateConfig.from_yml(yml=yml, - dir_=cfg_dir, - dir_patches=dir_patches, - threads=threads) + cfg = EvaluateConfig.from_yml(yml=yml, + dir_=cfg_dir, + dir_patches=dir_patches, + threads=threads) with bugzoo.server.ephemeral(timeout_connection=120) as client_bugzoo: environment = Environment(bugzoo=client_bugzoo) @@ -358,10 +372,6 @@ def evaluate(self) -> None: session.close() - - - - class CLI(cement.App): class Meta: label = 'darjeeling' diff --git a/src/darjeeling/config.py b/src/darjeeling/config.py index a6af379d..b435e50a 100644 --- a/src/darjeeling/config.py +++ b/src/darjeeling/config.py @@ -138,10 +138,10 @@ class Config: """ dir_patches: str = attr.ib() program: ProgramDescriptionConfig - transformations: ProgramTransformationsConfig - localization: LocalizationConfig + transformations: Optional[ProgramTransformationsConfig] + localization: Optional[LocalizationConfig] search: SearcherConfig - coverage: CoverageConfig + coverage: Optional[CoverageConfig] resource_limits: ResourceLimits seed: int = attr.ib(default=0) optimizations: OptimizationsConfig = attr.ib(factory=OptimizationsConfig) @@ -240,7 +240,7 @@ def err(m: str) -> NoReturn: # coverage config if 'coverage' in yml: if plus: - yml['coverage']['method']['type']='plus' + yml['coverage']['method']['type'] = 'plus' coverage = CoverageConfig.from_dict(yml['coverage'], dir_) else: m = "'coverage' section is expected" @@ -282,7 +282,7 @@ def err(m: str) -> NoReturn: @attr.s(frozen=True, auto_attribs=True) -class EvaluateConfig: +class EvaluateConfig(Config): """A configuration for Darjeeling to evaluate patches with additional content. Attributes @@ -299,29 +299,23 @@ class EvaluateConfig: Limits on the resources that may be consumed during the search. """ - search: SearcherConfig - program: ProgramDescriptionConfig - resource_limits: ResourceLimits - dir_patches: str = attr.ib() - threads: int = attr.ib(default=1) - - @dir_patches.validator - def validate_patches(self, attribute, value): - if not os.path.isabs(value): - m = "patch directory should be an absolute path." - raise BadConfigurationException(m) - - @threads.validator - def validate_threads(self, attribute, value): - if value < 1: - m = "number of threads must be greater than or equal to 1." - raise BadConfigurationException(m) + # search: SearcherConfig + # program: ProgramDescriptionConfig + # resource_limits: ResourceLimits + # dir_patches: str = attr.ib() + # threads: int = attr.ib(default=1) @staticmethod def from_yml(yml: Dict[str, Any], dir_: Optional[str] = None, *, + terminate_early: bool = True, + plus: bool = False, + seed: Optional[int] = None, threads: Optional[int] = None, + run_redundant_tests: bool = False, + limit_candidates: Optional[int] = None, + limit_time_minutes: Optional[int] = None, dir_patches: Optional[str] = None ) -> 'EvaluateConfig': """Loads a configuration from a YAML dictionary. @@ -360,18 +354,18 @@ def err(m: str) -> NoReturn: resource_limits = \ ResourceLimits.from_dict(yml['resource-limits'], dir_) - - if 'program' not in yml: err("'program' section is missing") program = ProgramDescriptionConfig.from_dict(dict_=yml['program'], dir_=dir_, heldout=True) - - search = SearcherConfig.from_dict({'type':'reviewer'}, dir_) - - return EvaluateConfig( - threads=threads, - program=program, - search=search, - dir_patches=dir_patches, - resource_limits=resource_limits) + search = SearcherConfig.from_dict({'type': 'reviewer'}, dir_) + + return EvaluateConfig(threads=threads, + program=program, + search=search, + dir_patches=dir_patches, + resource_limits=resource_limits, + transformations=None, + localization=None, + coverage=None + ) diff --git a/src/darjeeling/coverage/config.py b/src/darjeeling/coverage/config.py index 6bd89533..f9d271f6 100644 --- a/src/darjeeling/coverage/config.py +++ b/src/darjeeling/coverage/config.py @@ -112,7 +112,7 @@ def build(self, # exclude yacc and lex files def is_yacc_or_lex_file(filename: str) -> bool: return filename.endswith(".y") or filename.endswith(".l") - + covered_files = set(filename for test_coverage in coverage.values() for filename in test_coverage.lines.files) restrict_to_files = set(filename for filename in covered_files if not is_yacc_or_lex_file(filename)) coverage = coverage.restrict_to_files(restrict_to_files) diff --git a/src/darjeeling/coverage/gcov.py b/src/darjeeling/coverage/gcov.py index e124afcb..94dd98a9 100644 --- a/src/darjeeling/coverage/gcov.py +++ b/src/darjeeling/coverage/gcov.py @@ -34,7 +34,7 @@ " exit(sig);\n" "}\n" "void __attribute__ ((constructor)) darjeeling_ctor (void) {\n" - #"void darjeeling_ctor (void) {\n" + # "void darjeeling_ctor (void) {\n" " struct sigaction new_action;\n" " new_action.sa_handler = darjeeling_sighandler;\n" " sigemptyset(&new_action.sa_mask);\n" @@ -132,8 +132,6 @@ def _find_source_filenames(self, """Determines the set of all source files within a program.""" with program.provision() as container: source_directory = program.source_directory - build_directory = program.build_directory - src_subdirectory = program.src_subdirectory endings = ('.cpp', '.cc', '.c', '.h', '.hh', '.hpp', '.cxx') command = ' -o '.join([f"-name \*{e}" for e in endings]) command = f'find {source_directory} -type f \( {command} \)' @@ -228,7 +226,7 @@ def _resolve_filepath(self, filename_relative: str) -> str: def _resolve_filepath_pdr(self, base_filename: str) -> str: base = os.path.basename(base_filename) # may make sense to check for duplicate basenames, but TBD - source_lut = { os.path.basename(fn): fn for fn in self._source_filenames } + source_lut = {os.path.basename(fn): fn for fn in self._source_filenames} return os.path.relpath(source_lut.get(base, base), self._source_directory) def _parse_xml_report(self, root: ET.Element) -> FileLineSet: @@ -268,12 +266,12 @@ def _extract(self, container: 'ProgramContainer') -> FileLineSet: command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' if self._src_subdirectory and self._src_subdirectory != "": - command+=f" {self._src_subdirectory} " + command += f" {self._src_subdirectory} " logger.trace(f"executing gcovr command: {command}") - fpath=self._build_directory + fpath = self._build_directory logger.info(f"executing gcovr command: '{command}' in '{fpath}'") - #gcda=shell.check_output("find . -type f -name \"*.gcda\"", cwd=fpath) - #logger.info(f"GCDA: \n>>>{gcda}\n<<<") + # gcda=shell.check_output("find . -type f -name \"*.gcda\"", cwd=fpath) + # logger.info(f"GCDA: \n>>>{gcda}\n<<<") shell.check_call(command, cwd=fpath) xml_file_contents = files.read(temporary_filename) logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") diff --git a/src/darjeeling/evaluator.py b/src/darjeeling/evaluator.py index a859ad22..ca89bb9f 100644 --- a/src/darjeeling/evaluator.py +++ b/src/darjeeling/evaluator.py @@ -18,7 +18,7 @@ from loguru import logger from . import exceptions as exc -from .candidate import (Candidate, DiffCandidate) +from .candidate import Candidate, DiffCandidate from .container import ProgramContainer from .outcome import (BuildOutcome, CandidateOutcome, CandidateOutcomeStore, TestOutcome, TestOutcomeSet) @@ -36,7 +36,7 @@ if typing.TYPE_CHECKING: from .problem import Problem -Evaluation = Tuple[Union[ Candidate, DiffCandidate ], CandidateOutcome] +Evaluation = Tuple[Candidate, CandidateOutcome] class Evaluator(DarjeelingEventProducer): @@ -118,26 +118,30 @@ def _filter_redundant_tests(self, candidate: Candidate, tests: List[Test] ) -> Tuple[List[Test], Set[Test]]: - line_coverage_by_test = self.__problem.coverage - lines_changed = candidate.lines_changed() + if self.__problem.coverage: + line_coverage_by_test = self.__problem.coverage + lines_changed = candidate.lines_changed() - # if no lines are changed, retain all tests (fixes issue #128) - if not lines_changed: - return (tests, set()) + # if no lines are changed, retain all tests (fixes issue #128) + if not lines_changed: + return (tests, set()) - keep: List[Test] = [] - drop: Set[Test] = set() - for test in tests: - test_line_coverage = line_coverage_by_test[test.name] - if not any(line in test_line_coverage for line in lines_changed): - drop.add(test) - else: - keep.append(test) - return (keep, drop) + keep: List[Test] = [] + drop: Set[Test] = set() + for test in tests: + test_line_coverage = line_coverage_by_test[test.name] + if not any(line in test_line_coverage for line in lines_changed): + drop.add(test) + else: + keep.append(test) + return (keep, drop) + else: + logger.warning("Attempting to run coverage-based evaluation on incompatible configuration") + return (tests, set()) def _run_test(self, container: ProgramContainer, - candidate: Union[ Candidate, DiffCandidate ], + candidate: Candidate, test: Test ) -> TestOutcome: """Runs a test for a given patch using a provided container.""" @@ -161,7 +165,7 @@ def _run_test(self, outcome = TestOutcome(successful=False, time_taken=timer.duration) - id_=" heldout" if isinstance(candidate,DiffCandidate) else "" + id_ = " heldout" if isinstance(candidate, DiffCandidate) else "" if not outcome.successful: logger.debug(f"*{id_} test failed: {test.name} ({candidate})") @@ -170,7 +174,7 @@ def _run_test(self, self.dispatch(TestExecutionFinished(candidate, test, outcome)) return outcome - def _evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> CandidateOutcome: + def _evaluate(self, candidate: Candidate) -> CandidateOutcome: outcomes = self.__outcomes patch = candidate.to_diff() logger.info(f"evaluating candidate: {candidate}\n{patch}\n") @@ -269,7 +273,7 @@ def _evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> CandidateOu finally: logger.info(f"evaluated candidate: {candidate}") - def evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> Evaluation: + def evaluate(self, candidate: Candidate) -> Evaluation: """Evaluates a given candidate patch.""" outcomes = self.__outcomes self.dispatch(CandidateEvaluationStarted(candidate)) @@ -290,7 +294,7 @@ def evaluate(self, candidate: Union[ Candidate, DiffCandidate ]) -> Evaluation: self.__num_running -= 1 return (candidate, outcome) - def submit(self, candidate: Union[ Candidate, DiffCandidate ]) -> 'Future[Evaluation]': + def submit(self, candidate: Candidate) -> 'Future[Evaluation]': """Schedules a candidate patch evaluation.""" with self.__lock: self.__num_running += 1 diff --git a/src/darjeeling/events/event.py b/src/darjeeling/events/event.py index f66d0bcf..93b9dfa7 100644 --- a/src/darjeeling/events/event.py +++ b/src/darjeeling/events/event.py @@ -8,6 +8,7 @@ TestOutcome as _TestOutcome, BuildOutcome as _BuildOutcome) from ..candidate import Candidate as _Candidate + from ..outcome import CandidateOutcome as _CandidateOutcome diff --git a/src/darjeeling/localization.py b/src/darjeeling/localization.py index 991b7c87..6c9d252b 100644 --- a/src/darjeeling/localization.py +++ b/src/darjeeling/localization.py @@ -150,7 +150,7 @@ def from_config(coverage: TestCoverageMap, m = f"suspiciousness metric not supported: {cfg.metric}" raise BadConfigurationException(m) logger.info(f"using suspiciousness metric: {cfg.metric}") - #logger.info(f"coverage: {str(coverage)}") + # logger.info(f"coverage: {str(coverage)}") logger.debug(f"coverage: {str(coverage)}") loc = Localization.from_coverage(coverage, metric) diff --git a/src/darjeeling/problem.py b/src/darjeeling/problem.py index aea46f3b..25ed0793 100644 --- a/src/darjeeling/problem.py +++ b/src/darjeeling/problem.py @@ -53,25 +53,55 @@ class Problem: environment: 'Environment' config: 'Config' language: 'Language' - coverage: 'TestCoverageMap' + coverage: 'Optional[TestCoverageMap]' sources: ProgramSource program: 'ProgramDescription' failing_tests: Sequence[Test] passing_tests: Sequence[Test] test_ordering: Iterable[Test] analysis: Optional[Analysis] - localization: 'Localization' + localization: 'Optional[Localization]' @staticmethod def build(environment: 'Environment', config: 'Config', language: 'Language', - coverage: 'TestCoverageMap', + coverage: 'Optional[TestCoverageMap]', program: 'ProgramDescription', - localization: 'Localization', + localization: 'Optional[Localization]', *, analysis: Optional[Analysis] = None, + patch_files: set = set(), ) -> 'Problem': + if coverage and localization: + return Problem.build_default( + environment, + config, + language, + coverage, + program, + localization, + analysis=analysis + ) + else: + return Problem.build_evaluation( + environment, + config, + language, + program, + patch_files=patch_files + ) + + @staticmethod + def build_default(environment: 'Environment', + config: 'Config', + language: 'Language', + coverage: 'TestCoverageMap', + program: 'ProgramDescription', + localization: 'Localization', + *, + analysis: Optional[Analysis] = None + ) -> 'Problem': """Constructs a problem description. Raises @@ -148,26 +178,25 @@ def ordering(x: Test, y: Test) -> int: @staticmethod def build_evaluation(environment: 'Environment', - config: 'EvaluateConfig', - language: 'Language', - program: 'ProgramDescription', - *, - patch_files: set, - ) -> 'Problem': + config: 'Config', + language: 'Language', + program: 'ProgramDescription', + *, + patch_files: set, + ) -> 'Problem': """Constructs a Problem description based on Patch file for evaluation only. Raises ------- """ - passing_tests: Sequence[Test] = \ - tuple( sorted(program.tests) ) - + passing_tests: Sequence[Test] = tuple(program.tests) + failing_tests: Sequence[Test] = tuple() logger.info("ordering test cases") test_ordering: Sequence[Test] = \ - tuple(sorted(program.tests)) + tuple(program.tests) logger.info('test order: {}', ', '.join(t.name for t in test_ordering)) logger.debug("storing contents of source code files") @@ -177,18 +206,17 @@ def build_evaluation(environment: 'Environment', logger.debug("stored contents of source code files") solution = Problem(environment=environment, - program=program, - language=language, - sources=sources, - config=config, - passing_tests=passing_tests, - failing_tests=failing_tests, - test_ordering=test_ordering, - analysis=None, - coverage=None, - localization=None - ) - + program=program, + language=language, + sources=sources, + config=config, + passing_tests=passing_tests, + failing_tests=failing_tests, + test_ordering=test_ordering, + analysis=None, + localization=None, + coverage=None + ) return solution def validate(self) -> None: @@ -229,9 +257,10 @@ def lines(self) -> Iterator[FileLine]: Returns an iterator over the lines that are implicated by the description of this problem. """ - yield from self.coverage.failing.locations + if self.coverage: + yield from self.coverage.failing.locations @property def implicated_files(self) -> Iterator[str]: - yield from set(location.filename for location in self.coverage.failing.locations) - + if self.coverage: + yield from set(location.filename for location in self.coverage.failing.locations) diff --git a/src/darjeeling/program.py b/src/darjeeling/program.py index d1017267..5b3c1422 100644 --- a/src/darjeeling/program.py +++ b/src/darjeeling/program.py @@ -36,7 +36,7 @@ class ProgramDescriptionConfig: @staticmethod def from_dict(dict_: Mapping[str, Any], dir_: Optional[str] = None, - heldout : Optional[bool] = False + heldout: Optional[bool] = False ) -> 'ProgramDescriptionConfig': def err(message: str) -> NoReturn: raise exc.BadConfigurationException(message) @@ -82,7 +82,7 @@ def err(message: str) -> NoReturn: err(f"unsupported language [{dict_['language']}]. {supported}") # test suite - # populate with 'heldout-tests' content only when specified + # populate with 'heldout-tests' content only when specified # 'tests' is default behavior tests_key = 'tests' if not heldout else 'heldout-tests' diff --git a/src/darjeeling/resources.py b/src/darjeeling/resources.py index 151ab85b..1aa71867 100644 --- a/src/darjeeling/resources.py +++ b/src/darjeeling/resources.py @@ -37,11 +37,6 @@ def with_limits(limits: 'ResourceLimits') -> 'ResourceUsageTracker': """Constructs a new tracker with given resource limits.""" return ResourceUsageTracker(limits=limits) - @staticmethod - def no_limits() -> 'ResourceUsageTracker': - """Constructs a new tracker with given resource limits.""" - return ResourceUsageTracker(limits=None) - def check_limits(self) -> None: """Checks whether the resource limit has been reached, and if so, throws an exception. diff --git a/src/darjeeling/searcher/__init__.py b/src/darjeeling/searcher/__init__.py index df2f78f1..f1727644 100644 --- a/src/darjeeling/searcher/__init__.py +++ b/src/darjeeling/searcher/__init__.py @@ -3,4 +3,4 @@ from .base import Searcher from .exhaustive import ExhaustiveSearcher from .genetic import GeneticSearcher -from .reviewer import Reviewer \ No newline at end of file +from .reviewer import Reviewer diff --git a/src/darjeeling/searcher/config.py b/src/darjeeling/searcher/config.py index 19ae5178..7c9eb1a0 100644 --- a/src/darjeeling/searcher/config.py +++ b/src/darjeeling/searcher/config.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- __all__ = ('SearcherConfig',) -from typing import Dict, Optional, Any, Type, Iterator +from typing import Dict, Optional, Any, Type, Iterator, List import abc import typing @@ -12,6 +12,7 @@ from ..problem import Problem from ..resources import ResourceUsageTracker from ..transformation import ProgramTransformations + from ..candidate import DiffPatch @dynamically_registered(lookup='lookup') @@ -43,8 +44,9 @@ def from_dict(cls, def build(self, problem: 'Problem', resources: 'ResourceUsageTracker', - transformations: 'ProgramTransformations', *, + transformations: 'Optional[ProgramTransformations]' = None, + candidates: 'Optional[List[DiffPatch]]' = None, threads: int = 1, run_redundant_tests: bool = False ) -> 'Searcher': diff --git a/src/darjeeling/searcher/exhaustive.py b/src/darjeeling/searcher/exhaustive.py index 90060648..de2dac7b 100644 --- a/src/darjeeling/searcher/exhaustive.py +++ b/src/darjeeling/searcher/exhaustive.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- __all__ = ('ExhaustiveSearcher',) -from typing import Any, Dict, Iterable, Iterator, Optional +from typing import Any, Dict, Iterable, Iterator, Optional, List import typing from loguru import logger @@ -10,12 +10,12 @@ from .config import SearcherConfig from ..candidate import Candidate from ..resources import ResourceUsageTracker -from ..transformation import Transformation +from ..transformation import Transformation, ProgramTransformations from ..exceptions import SearchExhausted if typing.TYPE_CHECKING: from ..problem import Problem - from ..transformations import ProgramTransformations + from ..candidate import DiffPatch class ExhaustiveSearcherConfig(SearcherConfig): @@ -38,16 +38,20 @@ def from_dict(cls, def build(self, problem: 'Problem', resources: ResourceUsageTracker, - transformations: 'ProgramTransformations', + transformations: 'ProgramTransformations' = None, *, + candidates: 'List[DiffPatch]' = None, threads: int = 1, run_redundant_tests: bool = False ) -> Searcher: + if not transformations: + transformations = ProgramTransformations.build([], problem) return ExhaustiveSearcher(problem=problem, resources=resources, transformations=transformations, threads=threads, - run_redundant_tests=run_redundant_tests) + run_redundant_tests=run_redundant_tests + ) class ExhaustiveSearcher(Searcher): diff --git a/src/darjeeling/searcher/genetic.py b/src/darjeeling/searcher/genetic.py index d61293ef..f5ad3045 100644 --- a/src/darjeeling/searcher/genetic.py +++ b/src/darjeeling/searcher/genetic.py @@ -10,7 +10,8 @@ from .base import Searcher from .config import SearcherConfig -from ..candidate import Candidate +from ..candidate import Candidate, DiffPatch + from ..resources import ResourceUsageTracker from ..transformation import Transformation, ProgramTransformations from ..outcome import CandidateOutcome @@ -53,12 +54,15 @@ def from_dict(cls, def build(self, problem: 'Problem', - resources: ResourceUsageTracker, - transformations: ProgramTransformations, + resources: 'ResourceUsageTracker', + transformations: 'ProgramTransformations' = None, *, + candidates: 'Optional[List[DiffPatch]]' = None, threads: int = 1, - run_redundant_tests: bool = False + run_redundant_tests: bool = False, ) -> Searcher: + if not transformations: + transformations = ProgramTransformations.build([], problem) return GeneticSearcher(problem=problem, resources=resources, transformations=transformations, @@ -130,7 +134,7 @@ def tournament_size(self) -> int: def initial(self) -> Population: """Generates an initial population according to this strategy.""" - pop = [] + pop: Population = [] for _ in range(self.population_size): pop.append(Candidate(self.problem, [])) return self.mutate(pop) @@ -175,12 +179,14 @@ def select(self, def mutate(self, pop: Population) -> Population: problem = self.problem - offspring = [] + offspring: Population = [] for ind in pop: child = ind if random.random() <= self.rate_mutation: mutation = self.choose_transformation() - transformations = child.transformations + (mutation,) + transformations = None + if child.transformations: + transformations = child.transformations + (mutation,) child = Candidate(problem, transformations) # type: ignore offspring.append(child) return offspring @@ -190,8 +196,8 @@ def one_point_crossover(px: Candidate, py: Candidate ) -> List[Candidate]: problem = self.problem - tx = list(px.transformations) - ty = list(py.transformations) + tx = list(px.transformations) if px.transformations else list() + ty = list(py.transformations) if py.transformations else list() lx = random.randint(0, len(tx)) ly = random.randint(0, len(ty)) diff --git a/src/darjeeling/searcher/reviewer.py b/src/darjeeling/searcher/reviewer.py index f7f206df..c72c735b 100644 --- a/src/darjeeling/searcher/reviewer.py +++ b/src/darjeeling/searcher/reviewer.py @@ -9,11 +9,9 @@ from .base import Searcher from .config import SearcherConfig -from ..candidate import DiffCandidate +from ..candidate import Candidate, DiffCandidate, DiffPatch from ..resources import ResourceUsageTracker -from ..transformation import Transformation from ..exceptions import SearchExhausted -from bugzoo.core.patch import Patch if typing.TYPE_CHECKING: from ..problem import Problem @@ -39,22 +37,26 @@ def from_dict(cls, def build(self, problem: 'Problem', - resources: ResourceUsageTracker, - candidates: List[Patch], + resources: 'ResourceUsageTracker', + candidates: 'List[DiffPatch]' = None, *, - threads: int = 1 + transformations: 'Optional[ProgramTransformations]' = None, + threads: int = 1, + run_redundant_tests: bool = False ) -> Searcher: + if not candidates: + candidates = [] return Reviewer(problem=problem, - resources=resources, - candidates=candidates, - threads=threads) + resources=resources, + candidates=candidates, + threads=threads) class Reviewer(Searcher): def __init__(self, problem: 'Problem', resources: ResourceUsageTracker, - candidates: List[Patch], + candidates: List[DiffPatch], *, threads: int = 1 ) -> None: @@ -67,16 +69,16 @@ def __init__(self, @staticmethod def all_candidates(problem: 'Problem', - candidates: Iterable[DiffCandidate] - ) -> Iterator[DiffCandidate]: + candidates: Iterable[DiffPatch] + ) -> Iterator[Candidate]: logger.debug(f"Obtaining all patch candidates") for c in candidates: logger.trace(f"Processing {repr(c)}") print(f"Processing {repr(c)}") - yield DiffCandidate(problem,c) + yield DiffCandidate(problem, [], c) logger.debug(f"Obtained all patch candidates") - def _generate(self) -> DiffCandidate: + def _generate(self) -> Candidate: try: logger.debug('generating candidate patch...') candidate = next(self.__candidates) @@ -86,7 +88,7 @@ def _generate(self) -> DiffCandidate: logger.debug('exhausted all candidate patches') raise SearchExhausted - def run(self) -> Iterator[DiffCandidate]: + def run(self) -> Iterator[Candidate]: for _ in range(self.num_workers): candidate = self._generate() self.evaluate(candidate) diff --git a/src/darjeeling/session.py b/src/darjeeling/session.py index 70c0c361..391624a9 100644 --- a/src/darjeeling/session.py +++ b/src/darjeeling/session.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -__all__ = ('Session','EvaluateSession',) +__all__ = ('Session', 'EvaluateSession',) -from typing import Iterator, List +from typing import Iterator, List, Optional, Set import glob import os import random @@ -85,18 +85,18 @@ def from_config(environment: Environment, cfg: Config) -> 'Session': # compute coverage logger.info("computing coverage information...") - coverage = cfg.coverage.build(environment, program) + coverage = cfg.coverage.build(environment, program) if cfg.coverage else None logger.info("computed coverage information") logger.debug(f"coverage: {coverage}") # compute localization logger.info("computing fault localization...") localization = \ - Localization.from_config(coverage, cfg.localization) + Localization.from_config(coverage, cfg.localization) if coverage and cfg.localization else None logger.info(f"computed fault localization:\n{localization}") # determine implicated files - files = localization.files + files = localization.files if localization else None if program.language in (Language.CPP, Language.C): kaskara_project = kaskara.Project(dockerblade=environment.dockerblade, @@ -132,7 +132,7 @@ def from_config(environment: Environment, cfg: Config) -> 'Session': snippets = LineSnippetDatabase.for_problem(problem) logger.info(f"constructed database of donor snippets: {len(snippets)} snippets") - transformations = cfg.transformations.build(problem, snippets) + transformations = cfg.transformations.build(problem, snippets) if cfg.transformations else None searcher = cfg.search.build(problem, resources=resources, transformations=transformations, @@ -158,9 +158,12 @@ def problem(self) -> Problem: return self.searcher.problem @property - def coverage(self) -> TestCoverageMap: + def coverage(self) -> Optional[TestCoverageMap]: """The test suite coverage for the program under repair.""" - return self.problem.coverage + if self.problem.coverage: + return self.problem.coverage + else: + return None def attach_handler(self, handler: DarjeelingEventHandler) -> None: super().attach_handler(handler) @@ -239,7 +242,7 @@ class EvaluateSession(DarjeelingEventProducer): searcher: Searcher = attr.ib() resources: ResourceUsageTracker = attr.ib() candidates: List[DiffPatch] = attr.ib(factory=list) - _general_patches: List[DiffPatch] = attr.ib(factory=list) + _general_patches: List[Candidate] = attr.ib(factory=list) def __attrs_post_init__(self) -> None: DarjeelingEventProducer.__init__(self) @@ -253,22 +256,27 @@ def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': if not os.path.exists(dir_patches): print(f"Patch directory does not exist: {dir_patches}") raise RuntimeError - + logger.warning("checking existing patch directory") - candidates:List[DiffPatch] = [] + candidates: List[DiffPatch] = [] + + logger.warning("clearing existing patch directory of previously identified general-patches") + for fn in glob.glob(f'{dir_patches}/general-*.diff'): + if os.path.isfile(fn): + os.remove(fn) for fn in glob.glob(f'{dir_patches}/*.diff'): if os.path.isfile(fn): logger.debug(f"Reading in {fn}") diff = open(fn, 'r').read() - fn_name=os.path.basename(fn) - candidates.append(DiffPatch(file=fn_name,patch=Patch.from_unidiff(diff))) + fn_name = os.path.basename(fn) + candidates.append(DiffPatch(file=fn_name, patch=Patch.from_unidiff(diff))) - patched_files = set() + patched_files: Set[str] = set() for p in candidates: patched_files.add(*p.files) logger.debug(f"These files were patched: {patched_files}") - if len(patched_files)==0: + if len(patched_files) == 0: print(f"Patch directory was effectively empty.") raise RuntimeError logger.debug('obtained content from patch directory') @@ -284,11 +292,11 @@ def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': # build problem for solution evaluations problem = Problem.build_evaluation(environment=environment, - config=cfg, - language=program.language, - program=program, - patch_files=patched_files - ) + config=cfg, + language=program.language, + program=program, + patch_files=patched_files + ) logger.debug(f"built program: {program}") searcher = cfg.search.build(problem, @@ -297,19 +305,18 @@ def from_config(environment: Environment, cfg: Config) -> 'EvaluateSession': threads=cfg.threads) # build basic structure to evaluate solutions evaluation = Problem.build_evaluation(environment=environment, - config=cfg, - language=program.language, - program=program, - patch_files=patched_files) + config=cfg, + language=program.language, + program=program, + patch_files=patched_files) # build session - return EvaluateSession( - problem=evaluation, - searcher=searcher, - resources=resources, - candidates=candidates, - dir_patches=dir_patches - ) + return EvaluateSession(problem=evaluation, + searcher=searcher, + resources=resources, + candidates=candidates, + dir_patches=dir_patches + ) @property def snapshot(self) -> Snapshot: @@ -381,4 +388,4 @@ def _save_patches_to_disk(self) -> None: def __enter__(self) -> 'EvaluateSession': self.run() - return self \ No newline at end of file + return self diff --git a/src/darjeeling/test/shell.py b/src/darjeeling/test/shell.py index 65bc3fe7..dc9a5071 100644 --- a/src/darjeeling/test/shell.py +++ b/src/darjeeling/test/shell.py @@ -111,10 +111,10 @@ def execute( cwd=self._workdir, time_limit=self._time_limit_seconds, environment=environment, - # 12/7 note from pdr - not all runs seem to return str compatible + # 12/7 note from pdr - not all runs seem to return str compatible # with default decoding in dockerblade # outcome.output does not look like it's used at all - #text=True, + # text=True, text=False, ) logger.trace(f"shell test outcome: {outcome}") diff --git a/src/darjeeling/transformation/base.py b/src/darjeeling/transformation/base.py index f608264c..772f5efa 100644 --- a/src/darjeeling/transformation/base.py +++ b/src/darjeeling/transformation/base.py @@ -91,5 +91,6 @@ def find_all_at_lines(self, def find_all(self, problem: 'Problem') -> Iterator[Transformation]: """Finds all transformations using this schema for a given problem.""" - implicated_lines = list(problem.localization) - yield from self.find_all_at_lines(implicated_lines) + if problem.localization: + implicated_lines = list(problem.localization) + yield from self.find_all_at_lines(implicated_lines) From 3d40fc0aed9a7d8cfcf5cdc5bc2145512cdf87db Mon Sep 17 00:00:00 2001 From: pdreiter Date: Fri, 7 Apr 2023 21:56:07 -0700 Subject: [PATCH 4/5] cleaned up tox errors/warnings in coverage/plus.py --- src/darjeeling/coverage/plus.py | 268 ++++++++++++++++---------------- 1 file changed, 134 insertions(+), 134 deletions(-) diff --git a/src/darjeeling/coverage/plus.py b/src/darjeeling/coverage/plus.py index d6d8c301..17d7feda 100644 --- a/src/darjeeling/coverage/plus.py +++ b/src/darjeeling/coverage/plus.py @@ -3,21 +3,21 @@ import os import typing as t -import xml.etree.ElementTree as ET +# import xml.etree.ElementTree as ET from loguru import logger import attr from .collector import CoverageCollector, CoverageCollectorConfig from ..core import FileLineSet -from ..source import ProgramSourceFile +# from ..source import ProgramSourceFile if t.TYPE_CHECKING: from ..container import ProgramContainer from ..environment import Environment from ..program import ProgramDescription -#_INSTRUMENTATION = ( +# _INSTRUMENTATION = ( # "/* DARJEELING :: INSTRUMENTATION :: START */\n" # "#include \n" # "#include \n" @@ -51,9 +51,9 @@ # " signal(SIGUSR2, darjeeling_sighandler);\n" # "}\n" # "/* DARJEELING :: INSTRUMENTATION :: END */\n" -#) -#_NUM_INSTRUMENTATION_LINES = _INSTRUMENTATION.count('\n') -#_LINES_TO_REMOVE = set(range(1, _NUM_INSTRUMENTATION_LINES)) +# ) +# _NUM_INSTRUMENTATION_LINES = _INSTRUMENTATION.count('\n') +# _LINES_TO_REMOVE = set(range(1, _NUM_INSTRUMENTATION_LINES)) # # @attr.s(auto_attribs=True, slots=True) @@ -117,8 +117,8 @@ def _find_source_filenames(self, """Determines the set of all source files within a program.""" with program.provision() as container: source_directory = program.source_directory - build_directory = program.build_directory - src_subdirectory = program.src_subdirectory + # build_directory = program.build_directory + # src_subdirectory = program.src_subdirectory endings = ('.cpp', '.cc', '.c', '.h', '.hh', '.hpp', '.cxx') command = ' -o '.join([f"-name \*{e}" for e in endings]) command = f'find {source_directory} -type f \( {command} \)' @@ -160,172 +160,172 @@ class PlusCollector(CoverageCollector): _source_filenames: t.FrozenSet[str] _environment: 'Environment' = attr.ib(repr=False) - #def _read_line_coverage_for_class(self, xml_class: ET.Element) -> t.Set[int]: - # xml_lines = xml_class.find('lines') - # assert xml_lines - # lines = xml_lines.findall('line') - # return set(int(line.attrib['number']) for line in lines - # if int(line.attrib['hits']) > 0) + # def _read_line_coverage_for_class(self, xml_class: ET.Element) -> t.Set[int]: + # xml_lines = xml_class.find('lines') + # assert xml_lines + # lines = xml_lines.findall('line') + # return set(int(line.attrib['number']) for line in lines + # if int(line.attrib['hits']) > 0) - #def _corrected_lines(self, - # relative_filename: str, - # lines: t.Set[int] - # ) -> t.Set[int]: - # if os.path.isabs(relative_filename): - # absolute_filename = relative_filename - # else: - # absolute_filename = os.path.join(self._source_directory, relative_filename) + # def _corrected_lines(self, + # relative_filename: str, + # lines: t.Set[int] + # ) -> t.Set[int]: + # if os.path.isabs(relative_filename): + # absolute_filename = relative_filename + # else: + # absolute_filename = os.path.join(self._source_directory, relative_filename) # - # instrumented_filenames = set(f.filename for f in self._files_to_instrument) - # if absolute_filename not in instrumented_filenames: - # logger.trace(f"file was not instrumented: {absolute_filename}") - # return lines + # instrumented_filenames = set(f.filename for f in self._files_to_instrument) + # if absolute_filename not in instrumented_filenames: + # logger.trace(f"file was not instrumented: {absolute_filename}") + # return lines # - # lines = lines - _LINES_TO_REMOVE - # return set(i - _NUM_INSTRUMENTATION_LINES for i in lines) + # lines = lines - _LINES_TO_REMOVE + # return set(i - _NUM_INSTRUMENTATION_LINES for i in lines) + + # def _has_source_file(self, filename_relative: str) -> bool: + # source_directory = self._source_directory + # filename_absolute = os.path.join(source_directory, filename_relative) + # return filename_absolute in self._source_filenames - #def _has_source_file(self, filename_relative: str) -> bool: - # source_directory = self._source_directory - # filename_absolute = os.path.join(source_directory, filename_relative) - # return filename_absolute in self._source_filenames - def get_relative_filename(self, filename_absolute: str) -> str: from re import sub - relative_filename = sub(self._source_directory+"/", "", filename_absolute) + relative_filename = sub(self._source_directory + "/", "", filename_absolute) return relative_filename def _has_source_file(self, filename_absolute: str) -> bool: return filename_absolute in self._source_filenames - ## FIXME is this a general solution? nope, not a general solution - #def _resolve_filepath(self, filename_relative: str) -> str: - # if not filename_relative: - # raise ValueError('failed to resolve path') - # if self._has_source_file(filename_relative): - # return filename_relative - # - # filename_relative_child = '/'.join(filename_relative.split('/')[1:]) - # return self._resolve_filepath(filename_relative_child) + # # FIXME is this a general solution? nope, not a general solution + # def _resolve_filepath(self, filename_relative: str) -> str: + # if not filename_relative: + # raise ValueError('failed to resolve path') + # if self._has_source_file(filename_relative): + # return filename_relative + # + # filename_relative_child = '/'.join(filename_relative.split('/')[1:]) + # return self._resolve_filepath(filename_relative_child) - #def _resolve_filepath_pdr(self, base_filename: str) -> str: - # src_file=os.path.join(self._src_subdirectory,base_filename) - # return self._resolve_filepath(src_file) + # def _resolve_filepath_pdr(self, base_filename: str) -> str: + # src_file=os.path.join(self._src_subdirectory,base_filename) + # return self._resolve_filepath(src_file) - #def _parse_xml_report(self, root: ET.Element) -> FileLineSet: - # packages_node = root.find('packages') - # assert packages_node - # package_nodes = packages_node.findall('package') - # class_nodes = [c for p in package_nodes for c in p.find('classes').findall('class')] # type: ignore + # def _parse_xml_report(self, root: ET.Element) -> FileLineSet: + # packages_node = root.find('packages') + # assert packages_node + # package_nodes = packages_node.findall('package') + # class_nodes = [c for p in package_nodes for c in p.find('classes').findall('class')] # type: ignore # - # filename_to_lines: t.Dict[str, t.Set[int]] = {} - # for node in class_nodes: - # filename = node.attrib['filename'] - # try: - # filename_original = filename - # filename = self._resolve_filepath_pdr(filename) - # logger.trace(f"resolving path '{filename_original}' " - # f"-> '{filename}'") - # except ValueError: - # logger.warning(f'failed to resolve file: {filename}') - # continue + # filename_to_lines: t.Dict[str, t.Set[int]] = {} + # for node in class_nodes: + # filename = node.attrib['filename'] + # try: + # filename_original = filename + # filename = self._resolve_filepath_pdr(filename) + # logger.trace(f"resolving path '{filename_original}' " + # f"-> '{filename}'") + # except ValueError: + # logger.warning(f'failed to resolve file: {filename}') + # continue # - # lines = self._read_line_coverage_for_class(node) - # lines = self._corrected_lines(filename, lines) - # if lines: - # filename_to_lines[filename] = lines + # lines = self._read_line_coverage_for_class(node) + # lines = self._corrected_lines(filename, lines) + # if lines: + # filename_to_lines[filename] = lines # - # return FileLineSet(filename_to_lines) + # return FileLineSet(filename_to_lines) - def obtain_faults(self,fplus:dict) -> FileLineSet: + def obtain_faults(self, fplus: dict) -> FileLineSet: ftl: t.Dict[str, t.Set[int]] = {} logger.info(f"self._source_directory: {self._source_directory}") if fplus: - for idx in ['addsans','ubsans']: - floc=fplus.get(idx,None) + for idx in ['addsans', 'ubsans']: + floc = fplus.get(idx, None) if floc: - for f in floc: - traces=f.get('trace',None) - if traces: - for trace in traces: - if trace: - fname=trace[0] - fdir =trace[1] - fline=trace[2] - fcol =trace[3] - ffun =trace[4] - if fname != "" and fdir != "": - try: - fpath = os.path.join(fdir, fname) - absolute_filename = os.path.abspath(fpath) - rel_file=self.get_relative_filename(absolute_filename) - x=ftl.get(rel_file,None) - if not x: - ftl[rel_file]=set() - ftl[rel_file].add(fline) - logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") - except Exception as e: - raise(e) - else: - loc=f.get('loc',None) - if loc: - fname=loc[0] - fdir =loc[1] if isinstance(loc[1],str) else loc[1][0] - fline=loc[2] - fcol =loc[3] - ffun =loc[4] - if fname != "" and fdir != "": - try: - fpath = os.path.join(fdir, fname) - absolute_filename = os.path.abspath(fpath) - rel_file=self.get_relative_filename(absolute_filename) - x=ftl.get(rel_file,None) - if not x: - ftl[rel_file]=set() - ftl[rel_file].add(fline) - logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") - except Exception as e: - raise(e) + for f in floc: + traces = f.get('trace', None) + if traces: + for trace in traces: + if trace: + fname = trace[0] + fdir = trace[1] + fline = trace[2] + fcol = trace[3] + ffun = trace[4] + if fname != "" and fdir != "": + try: + fpath = os.path.join(fdir, fname) + absolute_filename = os.path.abspath(fpath) + rel_file = self.get_relative_filename(absolute_filename) + x = ftl.get(rel_file, None) + if not x: + ftl[rel_file] = set() + ftl[rel_file].add(fline) + logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") + except Exception as e: + raise(e) + else: + loc = f.get('loc', None) + if loc: + fname = loc[0] + fdir = loc[1] if isinstance(loc[1], str) else loc[1][0] + fline = loc[2] + fcol = loc[3] + ffun = loc[4] + if fname != "" and fdir != "": + try: + fpath = os.path.join(fdir, fname) + absolute_filename = os.path.abspath(fpath) + rel_file = self.get_relative_filename(absolute_filename) + x = ftl.get(rel_file, None) + if not x: + ftl[rel_file] = set() + ftl[rel_file].add(fline) + logger.info(f"filename: {absolute_filename} => {rel_file} => {fline}") + except Exception as e: + raise(e) return FileLineSet(ftl) - #def _parse_xml_file_contents(self, contents: str) -> FileLineSet: + # def _parse_xml_file_contents(self, contents: str) -> FileLineSet: # logger.trace(f"Parsing gcovr report:\n{contents}") # root = ET.fromstring(contents) # return self._parse_xml_report(root) def _extract(self, container: 'ProgramContainer') -> FileLineSet: files = container.filesystem - fplus="/benchmarks/SanitizerResults/bothSan.json" - fplusd=None + fplus = "/benchmarks/SanitizerResults/bothSan.json" + fplusd = None try: - json_in=files.read(fplus) + json_in = files.read(fplus) import json - fplusd=json.loads(json_in) + fplusd = json.loads(json_in) except Exception as e: logger.error(f'Darjeeling Plus localization issue\nException:\n{e}') raise(e) return self.obtain_faults(fplusd) - #shell = container.shell - #temporary_filename = files.mktemp() + # shell = container.shell + # temporary_filename = files.mktemp() + # + # command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' + # if self._src_subdirectory and self._src_subdirectory != "": + # command+=f" {self._src_subdirectory} " + # logger.trace(f"executing gcovr command: {command}") + # # fpath=os.path.join(self._build_directory,self._src_subdirectory) + # fpath=self._build_directory + # logger.info(f"executing gcovr command: '{command}' in '{fpath}'") + # shell.check_call(command, cwd=fpath) + # xml_file_contents = files.read(temporary_filename) + # logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") # - #command = f'gcovr -o "{temporary_filename}" -x --root {self._source_directory} ' - #if self._src_subdirectory and self._src_subdirectory != "": - # command+=f" {self._src_subdirectory} " - #logger.trace(f"executing gcovr command: {command}") - ##fpath=os.path.join(self._build_directory,self._src_subdirectory) - #fpath=self._build_directory - #logger.info(f"executing gcovr command: '{command}' in '{fpath}'") - #shell.check_call(command, cwd=fpath) - #xml_file_contents = files.read(temporary_filename) - #logger.info(f"XML Contents: \n>>>>\n{xml_file_contents}\n<<<<") - # - #return self._parse_xml_file_contents(xml_file_contents) + # return self._parse_xml_file_contents(xml_file_contents) - #def _instrument( + # def _instrument( # self, # filename: str, # contents: str, # inject_at_line: int, - #) -> str: + # ) -> str: # file_ = ProgramSourceFile(filename, contents) # inject_at_location = file_.line_to_location_range(inject_at_line).start # inject_at_offset = file_.location_to_offset(inject_at_location) @@ -336,7 +336,7 @@ def _prepare(self, container: 'ProgramContainer') -> None: """ """ pass - #def _prepare(self, container: 'ProgramContainer') -> None: + # def _prepare(self, container: 'ProgramContainer') -> None: # """ # Adds source code instrumentation and recompiles the program inside # a container using the appropriate GCC options. Also ensures that From 0ab27da1c0264f086fccdced9e5b71ca8be11f10 Mon Sep 17 00:00:00 2001 From: ChrisTimperley Date: Sun, 11 Jun 2023 03:39:46 -0400 Subject: [PATCH 5/5] fixed environment setup --- src/darjeeling/cli/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/darjeeling/cli/__init__.py b/src/darjeeling/cli/__init__.py index 7d0fff02..e0042a64 100644 --- a/src/darjeeling/cli/__init__.py +++ b/src/darjeeling/cli/__init__.py @@ -360,8 +360,7 @@ def evaluate(self) -> None: dir_patches=dir_patches, threads=threads) - with bugzoo.server.ephemeral(timeout_connection=120) as client_bugzoo: - environment = Environment(bugzoo=client_bugzoo) + with Environment() as environment: try: session = EvaluateSession.from_config(environment, cfg) except BadConfigurationException: