from repo_cloner.lib import gen_repo_hashed_name from repo_cloner.lib import DirNotFoundError from repo_cloner.lib import ClonerConfig, DiskStoredList, RepoDirStructure, RepoTool, DetectedCommit, Detector from pathlib import Path from typing import Optional, Callable from time import time import os import logging log = logging.getLogger("rc.cloner") class Cloner: _dirs: RepoDirStructure = None _config: ClonerConfig = None _interval_file: str = "last-check-time" __detector_cfg = "detector.cfg" __submodule_cache: str = None _repo: RepoTool = None _repo_url: str = "" def __init__(self, dir_structure: RepoDirStructure): self._dirs = dir_structure self._config = self._dirs.config if len(self._config.cloner_repo_url) == 0: logging.critical(f"Undefined repo cloner URL in config!") raise KeyError(f"cloner_repo_url not defined in config!") # create cache dir, if missing try: assert self._dirs.cache_dir_exists except DirNotFoundError: log.info(f"Cache dir for project {self._config.cloner_project_name} not found -> creating") Path(self._dirs.cache_dir).mkdir() log.debug(f"Cache dir created") # submodule cache self.__submodule_cache = os.path.join(self._dirs.cache_dir, "submodules") if not os.path.exists(self.__submodule_cache): log.info("Submodule cache dir does not exist! -> creating") Path(self.__submodule_cache).mkdir(parents = True) def check_interval(self): log.debug(f"Checking interval for {self._config.cloner_project_name}") # get interval interval = self._config.cloner_interval # interval file? interval_file: Path = Path(self._dirs.cache_dir).joinpath(self._interval_file) log.debug(f"Interval file: {interval_file}") file_stamp: int = 0 if interval_file.exists(): str_val = interval_file.read_text() try: file_stamp = int(str_val) except ValueError: log.warning(f"Interval file file is corrupted, keeping value as nothing happened") # check time if time() > file_stamp + interval * 60: return True return False def open(self, url: str) -> bool: log.debug(f"Opening repo with url: {url}") repo_path = self._repo_path_by_url(url) self._repo_url = url self._repo = RepoTool(repo_path) return self.__opened @property def __opened(self) -> bool: if not self._repo: return False return self._repo.initialized def _repo_path_by_url(self, url: str) -> str: hashed_name: str = gen_repo_hashed_name(url) log.debug(f"Repo hashed name for {url} is {hashed_name}") return os.path.join(self._dirs.repos_dir, hashed_name) @property def __main_repo_path(self) -> str: return self._repo_path_by_url(self._config.cloner_repo_url) @classmethod def check_submodules_repo(cls, repo_tool: RepoTool, cache_file: str, submodule_list: DiskStoredList, scan_depth: Optional[int]): base = os.path.basename(repo_tool.path) log.debug(f"Loading submodule cache for repo {base}") repo_commits = DiskStoredList(cache_file) log.debug(f"Loaded {len(repo_commits)} commits") # list fetched repo log.debug(f"Scanning repo {base} for new submodules") new_commits = repo_tool.list_commits(scan_depth) # discover new submodules in new commits for commit in new_commits: log.debug(f"Discovering submodules in {commit.hexsha}") if commit.hexsha in repo_commits: log.debug(f"Cached commit... Okay") continue discovered = repo_tool.list_submodules(commit) if discovered: log.debug(f"Commit refers to {len(discovered)} submodules") for submodule in discovered: if submodule not in submodule_list: log.warning(f"Found new submodule: {submodule}") submodule_list.append(submodule) repo_commits.append(commit.hexsha) return submodule_list def sync(self) -> bool: if not self.__opened: self._repo = RepoTool(self.__main_repo_path) if not self._repo.initialized: return False # determine recursive behavior if not self._config.cloner_submodules: return self._repo.fetch() fingerprint = self._repo.repo_fingerprint # recursive now if not self._repo.fetch(): log.critical(f"Repo fetch failed for {self._config.cloner_project_name}") return False if fingerprint == self._repo.repo_fingerprint: log.info(f"Repo fingerpring unchanged - submodule discovery skipped") return True log.debug(f"Loading submodules.cache") submodules = DiskStoredList(os.path.join(self.__submodule_cache, "submodules.cache")) log.debug(f"Loaded submodules.cache - {len(submodules)} items") path = gen_repo_hashed_name(self._config.cloner_repo_url) log.debug(f"Main repo hashed name to load: {path}") # recursion limit? scan_depth = self._config.cloner_submodule_depth log.debug(f"Scan depth is {scan_depth} commits") if scan_depth == 0: log.debug(f"Repository scan depth is not limited! -> setting scan_depth to none") scan_depth = None submodules = Cloner.check_submodules_repo( self._repo, os.path.join(self.__submodule_cache, path), submodules, scan_depth) everything_succeed: bool = True everything_checked: bool = False fetched_repos = set() while not everything_checked: # recursively scan and clone repositories everything_checked = True # for every url in list # list() is needed - Runtime Error for set() changed during iteration for url in list(submodules): if url not in fetched_repos: everything_checked = False # generate new path directory = os.path.dirname(self.__main_repo_path) submodule_cloner = RepoTool(os.path.join(directory, gen_repo_hashed_name(url))) # clone or checkout? if not submodule_cloner.initialized: log.info(f"New uninitialized submodule found: {url}. Cloning...") checked: bool = submodule_cloner.clone(url) else: checked: bool = submodule_cloner.fetch() # mark cloned even if failed afterwards - while loop stuck solution fetched_repos.add(url) if not checked: log.critical(f"Clone/fetch of submodule: {url} failed") everything_succeed = False continue submodules = Cloner.check_submodules_repo( submodule_cloner, os.path.join(self.__submodule_cache, gen_repo_hashed_name(url)), submodules, scan_depth) return everything_succeed def perform_check(self): log.info(f"Started check for {self._config.cloner_project_name}, url: {self._config.cloner_repo_url}") if self.check_interval(): self.sync() log.info(f"Check finished") def clone(self, url: Optional[str] = None) -> bool: # optional parameters - othervise use config if not url: url = self._config.cloner_repo_url # generate path path = self._repo_path_by_url(url) self._repo_url = url self._repo = RepoTool(path) # uninitialized repo if self._repo.initialized: log.critical(f"Repo path {path} is initialized... Refusing clone!") return False # recursive or standard? if not self._config.cloner_submodules: return self._repo.clone(url) else: scan_depth_limit = self._config.cloner_submodule_depth # handle dept limit for submodule discovery if scan_depth_limit == 0: scan_depth_limit = None # another levels are handled internally as non-recursive clones and discovers by repo-tool return self._repo.clone_recursive(url, self.__submodule_cache, scan_depth = scan_depth_limit) @property def detector_enabled(self) -> bool: log.debug(f"Querying detector config file") return os.path.exists(os.path.join(self._dirs.conf_dir, self.__detector_cfg)) def detector_run(self, callback: Callable[[DetectedCommit], None]): detector = Detector(Path(self.__main_repo_path), Path(self._dirs.cache_dir), self._config.cloner_project_name) if detector.check_fingerprint(): log.debug(f"Starting detector discovery") detector.run(callback)