repo-cloner/repo_cloner/lib/cloner.py
Václav Valíček 1fef7bc404
New behavior, new tests, added wizzard and processor
Signed-off-by: Václav Valíček <valicek1994@gmail.com>
2022-08-07 21:44:31 +02:00

227 lines
7.8 KiB
Python

from repo_cloner.lib import gen_repo_hashed_name
from repo_cloner.lib import DirNotFoundError
from repo_cloner.lib import ClonerConfig, DiskStoredList, RepoDirStructure, RepoTool, DetectedCommit, Detector
from pathlib import Path
from typing import Optional, Callable
from time import time
import os
import logging
log = logging.getLogger("rc.cloner")
class Cloner:
_dirs: RepoDirStructure = None
_config: ClonerConfig = None
_interval_file: str = "last-check-time"
__detector_cfg = "detector.cfg"
__submodule_cache: str = None
_repo: RepoTool = None
_repo_url: str = ""
def __init__(self, dir_structure: RepoDirStructure):
self._dirs = dir_structure
self._config = self._dirs.config
if len(self._config.cloner_repo_url) == 0:
logging.critical(f"Undefined repo cloner URL in config!")
raise KeyError(f"cloner_repo_url not defined in config!")
# create cache dir, if missing
try:
assert self._dirs.cache_dir_exists
except DirNotFoundError:
log.info(f"Cache dir for project {self._config.cloner_project_name} not found -> creating")
Path(self._dirs.cache_dir).mkdir()
log.debug(f"Cache dir created")
# submodule cache
self.__submodule_cache = os.path.join(self._dirs.cache_dir, "submodules")
if not os.path.exists(self.__submodule_cache):
log.info("Submodule cache dir does not exist! -> creating")
Path(self.__submodule_cache).mkdir(parents = True)
def check_interval(self):
log.debug(f"Checking interval for {self._config.cloner_project_name}")
# get interval
interval = self._config.cloner_interval
# interval file?
interval_file: Path = Path(self._dirs.cache_dir).joinpath(self._interval_file)
log.debug(f"Interval file: {interval_file}")
file_stamp: int = 0
if interval_file.exists():
str_val = interval_file.read_text()
try:
file_stamp = int(str_val)
except ValueError:
log.warning(f"Interval file file is corrupted, keeping value as nothing happened")
# check time - 10 second grace period for delayed jobs
if (time() + 10) > file_stamp + interval * 60:
return True
return False
def open(self, url: str) -> bool:
log.debug(f"Opening repo with url: {url}")
repo_path = self._repo_path_by_url(url)
self._repo_url = url
self._repo = RepoTool(repo_path)
return self.__opened
@property
def __opened(self) -> bool:
if not self._repo:
return False
return self._repo.initialized
def _repo_path_by_url(self, url: str) -> str:
hashed_name: str = gen_repo_hashed_name(url)
log.debug(f"Repo hashed name for {url} is {hashed_name}")
return os.path.join(self._dirs.repos_dir, hashed_name)
@property
def main_repo_path(self) -> str:
return self._repo_path_by_url(self._config.cloner_repo_url)
@classmethod
def check_submodules_repo(cls, repo_tool: RepoTool, cache_file: str, submodule_list: DiskStoredList,
scan_depth: Optional[int]):
base = os.path.basename(repo_tool.path)
log.debug(f"Loading submodule cache for repo {base}")
repo_commits = DiskStoredList(cache_file)
log.debug(f"Loaded {len(repo_commits)} commits")
# list fetched repo
log.debug(f"Scanning repo {base} for new submodules")
new_commits = repo_tool.list_commits(scan_depth)
# discover new submodules in new commits
for commit in new_commits:
log.debug(f"Discovering submodules in {commit.hexsha}")
if commit.hexsha in repo_commits:
log.debug(f"Cached commit... Okay")
continue
discovered = repo_tool.list_submodules(commit)
if discovered:
log.debug(f"Commit refers to {len(discovered)} submodules")
for submodule in discovered:
if submodule not in submodule_list:
log.warning(f"Found new submodule: {submodule}")
submodule_list.append(submodule)
repo_commits.append(commit.hexsha)
return submodule_list
def sync(self) -> bool:
if not self.__opened:
self._repo = RepoTool(self.main_repo_path)
if not self._repo.initialized:
return False
# determine recursive behavior
if not self._config.cloner_submodules:
return self._repo.fetch()
fingerprint = self._repo.repo_fingerprint
# recursive now
if not self._repo.fetch():
log.critical(f"Repo fetch failed for {self._config.cloner_project_name}")
return False
if fingerprint == self._repo.repo_fingerprint:
log.info(f"Repo fingerpring unchanged - submodule discovery skipped")
return True
log.debug(f"Loading submodules.cache")
submodules = DiskStoredList(os.path.join(self.__submodule_cache, "submodules.cache"))
log.debug(f"Loaded submodules.cache - {len(submodules)} items")
path = gen_repo_hashed_name(self._config.cloner_repo_url)
log.debug(f"Main repo hashed name to load: {path}")
# recursion limit?
scan_depth = self._config.cloner_submodule_depth
log.debug(f"Scan depth is {scan_depth} commits")
if scan_depth == 0:
log.debug(f"Repository scan depth is not limited! -> setting scan_depth to none")
scan_depth = None
submodules = Cloner.check_submodules_repo(
self._repo, os.path.join(self.__submodule_cache, path), submodules, scan_depth)
everything_succeed: bool = True
everything_checked: bool = False
fetched_repos = set()
while not everything_checked:
# recursively scan and clone repositories
everything_checked = True
# for every url in list
# list() is needed - Runtime Error for set() changed during iteration
for url in list(submodules):
if url not in fetched_repos:
everything_checked = False
# generate new path
directory = os.path.dirname(self.main_repo_path)
submodule_cloner = RepoTool(os.path.join(directory, gen_repo_hashed_name(url)))
# clone or checkout?
if not submodule_cloner.initialized:
log.info(f"New uninitialized submodule found: {url}. Cloning...")
checked: bool = submodule_cloner.clone(url)
else:
checked: bool = submodule_cloner.fetch()
# mark cloned even if failed afterwards - while loop stuck solution
fetched_repos.add(url)
if not checked:
log.critical(f"Clone/fetch of submodule: {url} failed")
everything_succeed = False
continue
submodules = Cloner.check_submodules_repo(
submodule_cloner,
os.path.join(self.__submodule_cache, gen_repo_hashed_name(url)),
submodules, scan_depth)
return everything_succeed
def perform_check(self):
log.info(f"Started check for {self._config.cloner_project_name}, url: {self._config.cloner_repo_url}")
if self.check_interval():
self.sync()
log.info(f"Check finished")
def clone(self, url: Optional[str] = None) -> bool:
# optional parameters - othervise use config
if not url:
url = self._config.cloner_repo_url
# generate path
path = self._repo_path_by_url(url)
self._repo_url = url
self._repo = RepoTool(path)
# uninitialized repo
if self._repo.initialized:
log.critical(f"Repo path {path} is initialized... Refusing clone!")
return False
# recursive or standard?
if not self._config.cloner_submodules:
return self._repo.clone(url)
else:
scan_depth_limit = self._config.cloner_submodule_depth
# handle dept limit for submodule discovery
if scan_depth_limit == 0:
scan_depth_limit = None
# another levels are handled internally as non-recursive clones and discovers by repo-tool
return self._repo.clone_recursive(url, self.__submodule_cache, scan_depth = scan_depth_limit)
@property
def detector_enabled(self) -> bool:
log.debug(f"Querying detector config file")
return os.path.exists(os.path.join(self._dirs.conf_dir, self.__detector_cfg))
def detector_run(self, callback: Callable[[DetectedCommit], None]):
detector = Detector(Path(self.main_repo_path), Path(self._dirs.cache_dir), self._config.cloner_project_name)
if detector.check_fingerprint():
log.debug(f"Starting detector discovery")
detector.run(callback)
def detector_init(self):
detector = Detector(Path(self.main_repo_path), Path(self._dirs.cache_dir), self._config.cloner_project_name)
detector.initialize_caches()