2022-08-02 15:55:22 +02:00
|
|
|
from repo_cloner.lib import gen_repo_hashed_name
|
|
|
|
from repo_cloner.lib import DirNotFoundError
|
2022-08-04 09:50:42 +02:00
|
|
|
from repo_cloner.lib import ClonerConfig, DiskStoredList, RepoDirStructure, RepoTool, DetectedCommit, Detector
|
2022-08-02 15:55:22 +02:00
|
|
|
|
2022-07-28 21:03:20 +02:00
|
|
|
from pathlib import Path
|
2022-08-04 09:50:42 +02:00
|
|
|
from typing import Optional, Callable
|
2022-07-28 21:03:20 +02:00
|
|
|
from time import time
|
|
|
|
import os
|
|
|
|
import logging
|
|
|
|
|
|
|
|
log = logging.getLogger("rc.cloner")
|
|
|
|
|
|
|
|
|
|
|
|
class Cloner:
|
|
|
|
_dirs: RepoDirStructure = None
|
|
|
|
_config: ClonerConfig = None
|
|
|
|
_interval_file: str = "last-check-time"
|
2022-08-03 07:02:49 +02:00
|
|
|
__detector_cfg = "detector.cfg"
|
2022-07-31 02:08:27 +02:00
|
|
|
__submodule_cache: str = None
|
2022-07-28 21:03:20 +02:00
|
|
|
_repo: RepoTool = None
|
|
|
|
_repo_url: str = ""
|
|
|
|
|
|
|
|
def __init__(self, dir_structure: RepoDirStructure):
|
|
|
|
self._dirs = dir_structure
|
|
|
|
self._config = self._dirs.config
|
|
|
|
if len(self._config.cloner_repo_url) == 0:
|
|
|
|
logging.critical(f"Undefined repo cloner URL in config!")
|
|
|
|
raise KeyError(f"cloner_repo_url not defined in config!")
|
|
|
|
|
|
|
|
# create cache dir, if missing
|
|
|
|
try:
|
|
|
|
assert self._dirs.cache_dir_exists
|
|
|
|
except DirNotFoundError:
|
|
|
|
log.info(f"Cache dir for project {self._config.cloner_project_name} not found -> creating")
|
|
|
|
Path(self._dirs.cache_dir).mkdir()
|
|
|
|
log.debug(f"Cache dir created")
|
2022-07-31 02:08:27 +02:00
|
|
|
# submodule cache
|
|
|
|
self.__submodule_cache = os.path.join(self._dirs.cache_dir, "submodules")
|
|
|
|
if not os.path.exists(self.__submodule_cache):
|
|
|
|
log.info("Submodule cache dir does not exist! -> creating")
|
|
|
|
Path(self.__submodule_cache).mkdir(parents = True)
|
|
|
|
|
2022-07-28 21:03:20 +02:00
|
|
|
def check_interval(self):
|
|
|
|
log.debug(f"Checking interval for {self._config.cloner_project_name}")
|
|
|
|
# get interval
|
|
|
|
interval = self._config.cloner_interval
|
|
|
|
# interval file?
|
|
|
|
interval_file: Path = Path(self._dirs.cache_dir).joinpath(self._interval_file)
|
|
|
|
log.debug(f"Interval file: {interval_file}")
|
|
|
|
file_stamp: int = 0
|
|
|
|
if interval_file.exists():
|
|
|
|
str_val = interval_file.read_text()
|
|
|
|
try:
|
|
|
|
file_stamp = int(str_val)
|
|
|
|
except ValueError:
|
|
|
|
log.warning(f"Interval file file is corrupted, keeping value as nothing happened")
|
|
|
|
# check time
|
|
|
|
if time() > file_stamp + interval * 60:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def open(self, url: str) -> bool:
|
|
|
|
log.debug(f"Opening repo with url: {url}")
|
|
|
|
repo_path = self._repo_path_by_url(url)
|
|
|
|
self._repo_url = url
|
|
|
|
self._repo = RepoTool(repo_path)
|
|
|
|
return self.__opened
|
|
|
|
|
|
|
|
@property
|
|
|
|
def __opened(self) -> bool:
|
|
|
|
if not self._repo:
|
|
|
|
return False
|
|
|
|
return self._repo.initialized
|
|
|
|
|
|
|
|
def _repo_path_by_url(self, url: str) -> str:
|
|
|
|
hashed_name: str = gen_repo_hashed_name(url)
|
|
|
|
log.debug(f"Repo hashed name for {url} is {hashed_name}")
|
|
|
|
return os.path.join(self._dirs.repos_dir, hashed_name)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def __main_repo_path(self) -> str:
|
|
|
|
return self._repo_path_by_url(self._config.cloner_repo_url)
|
|
|
|
|
2022-08-02 15:55:22 +02:00
|
|
|
@classmethod
|
|
|
|
def check_submodules_repo(cls, repo_tool: RepoTool, cache_file: str, submodule_list: DiskStoredList,
|
|
|
|
scan_depth: Optional[int]):
|
|
|
|
base = os.path.basename(repo_tool.path)
|
|
|
|
log.debug(f"Loading submodule cache for repo {base}")
|
|
|
|
repo_commits = DiskStoredList(cache_file)
|
|
|
|
log.debug(f"Loaded {len(repo_commits)} commits")
|
|
|
|
|
|
|
|
# list fetched repo
|
|
|
|
log.debug(f"Scanning repo {base} for new submodules")
|
|
|
|
new_commits = repo_tool.list_commits(scan_depth)
|
|
|
|
|
|
|
|
# discover new submodules in new commits
|
|
|
|
for commit in new_commits:
|
|
|
|
log.debug(f"Discovering submodules in {commit.hexsha}")
|
|
|
|
if commit.hexsha in repo_commits:
|
|
|
|
log.debug(f"Cached commit... Okay")
|
|
|
|
continue
|
|
|
|
discovered = repo_tool.list_submodules(commit)
|
|
|
|
if discovered:
|
2022-08-03 00:03:15 +02:00
|
|
|
log.debug(f"Commit refers to {len(discovered)} submodules")
|
2022-08-02 15:55:22 +02:00
|
|
|
for submodule in discovered:
|
|
|
|
if submodule not in submodule_list:
|
|
|
|
log.warning(f"Found new submodule: {submodule}")
|
|
|
|
submodule_list.append(submodule)
|
|
|
|
repo_commits.append(commit.hexsha)
|
|
|
|
return submodule_list
|
|
|
|
|
2022-07-28 21:03:20 +02:00
|
|
|
def sync(self) -> bool:
|
2022-08-02 15:55:22 +02:00
|
|
|
|
2022-07-28 21:03:20 +02:00
|
|
|
if not self.__opened:
|
|
|
|
self._repo = RepoTool(self.__main_repo_path)
|
|
|
|
if not self._repo.initialized:
|
|
|
|
return False
|
2022-08-02 15:55:22 +02:00
|
|
|
# determine recursive behavior
|
|
|
|
if not self._config.cloner_submodules:
|
|
|
|
return self._repo.fetch()
|
|
|
|
|
2022-08-04 09:50:42 +02:00
|
|
|
fingerprint = self._repo.repo_fingerprint
|
|
|
|
|
2022-08-02 15:55:22 +02:00
|
|
|
# recursive now
|
|
|
|
if not self._repo.fetch():
|
|
|
|
log.critical(f"Repo fetch failed for {self._config.cloner_project_name}")
|
|
|
|
return False
|
|
|
|
|
2022-08-04 09:50:42 +02:00
|
|
|
if fingerprint == self._repo.repo_fingerprint:
|
|
|
|
log.info(f"Repo fingerpring unchanged - submodule discovery skipped")
|
|
|
|
return True
|
|
|
|
|
2022-08-02 15:55:22 +02:00
|
|
|
log.debug(f"Loading submodules.cache")
|
|
|
|
submodules = DiskStoredList(os.path.join(self.__submodule_cache, "submodules.cache"))
|
|
|
|
log.debug(f"Loaded submodules.cache - {len(submodules)} items")
|
|
|
|
path = gen_repo_hashed_name(self._config.cloner_repo_url)
|
|
|
|
log.debug(f"Main repo hashed name to load: {path}")
|
|
|
|
|
|
|
|
# recursion limit?
|
|
|
|
scan_depth = self._config.cloner_submodule_depth
|
|
|
|
log.debug(f"Scan depth is {scan_depth} commits")
|
|
|
|
if scan_depth == 0:
|
|
|
|
log.debug(f"Repository scan depth is not limited! -> setting scan_depth to none")
|
|
|
|
scan_depth = None
|
|
|
|
|
|
|
|
submodules = Cloner.check_submodules_repo(
|
|
|
|
self._repo, os.path.join(self.__submodule_cache, path), submodules, scan_depth)
|
|
|
|
|
|
|
|
everything_succeed: bool = True
|
|
|
|
everything_checked: bool = False
|
|
|
|
fetched_repos = set()
|
|
|
|
while not everything_checked:
|
|
|
|
# recursively scan and clone repositories
|
|
|
|
everything_checked = True
|
|
|
|
# for every url in list
|
|
|
|
# list() is needed - Runtime Error for set() changed during iteration
|
|
|
|
for url in list(submodules):
|
|
|
|
if url not in fetched_repos:
|
|
|
|
everything_checked = False
|
|
|
|
# generate new path
|
|
|
|
directory = os.path.dirname(self.__main_repo_path)
|
|
|
|
submodule_cloner = RepoTool(os.path.join(directory, gen_repo_hashed_name(url)))
|
|
|
|
# clone or checkout?
|
|
|
|
if not submodule_cloner.initialized:
|
|
|
|
log.info(f"New uninitialized submodule found: {url}. Cloning...")
|
|
|
|
checked: bool = submodule_cloner.clone(url)
|
|
|
|
else:
|
|
|
|
checked: bool = submodule_cloner.fetch()
|
|
|
|
|
|
|
|
# mark cloned even if failed afterwards - while loop stuck solution
|
|
|
|
fetched_repos.add(url)
|
|
|
|
if not checked:
|
|
|
|
log.critical(f"Clone/fetch of submodule: {url} failed")
|
|
|
|
everything_succeed = False
|
|
|
|
continue
|
|
|
|
|
|
|
|
submodules = Cloner.check_submodules_repo(
|
|
|
|
submodule_cloner,
|
|
|
|
os.path.join(self.__submodule_cache, gen_repo_hashed_name(url)),
|
|
|
|
submodules, scan_depth)
|
|
|
|
return everything_succeed
|
2022-07-28 21:03:20 +02:00
|
|
|
|
|
|
|
def perform_check(self):
|
|
|
|
log.info(f"Started check for {self._config.cloner_project_name}, url: {self._config.cloner_repo_url}")
|
|
|
|
if self.check_interval():
|
|
|
|
self.sync()
|
|
|
|
log.info(f"Check finished")
|
|
|
|
|
2022-07-31 02:08:27 +02:00
|
|
|
def clone(self, url: Optional[str] = None) -> bool:
|
|
|
|
# optional parameters - othervise use config
|
|
|
|
if not url:
|
|
|
|
url = self._config.cloner_repo_url
|
|
|
|
# generate path
|
2022-07-28 21:03:20 +02:00
|
|
|
path = self._repo_path_by_url(url)
|
|
|
|
self._repo_url = url
|
|
|
|
self._repo = RepoTool(path)
|
2022-07-31 02:08:27 +02:00
|
|
|
# uninitialized repo
|
2022-07-28 21:03:20 +02:00
|
|
|
if self._repo.initialized:
|
|
|
|
log.critical(f"Repo path {path} is initialized... Refusing clone!")
|
|
|
|
return False
|
2022-07-31 02:08:27 +02:00
|
|
|
|
|
|
|
# recursive or standard?
|
|
|
|
if not self._config.cloner_submodules:
|
|
|
|
return self._repo.clone(url)
|
|
|
|
else:
|
|
|
|
scan_depth_limit = self._config.cloner_submodule_depth
|
|
|
|
# handle dept limit for submodule discovery
|
|
|
|
if scan_depth_limit == 0:
|
|
|
|
scan_depth_limit = None
|
|
|
|
# another levels are handled internally as non-recursive clones and discovers by repo-tool
|
|
|
|
return self._repo.clone_recursive(url, self.__submodule_cache, scan_depth = scan_depth_limit)
|
2022-08-03 07:02:49 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def detector_enabled(self) -> bool:
|
|
|
|
return os.path.exists(os.path.join(self._dirs.conf_dir, self.__detector_cfg))
|
2022-08-04 09:50:42 +02:00
|
|
|
|
|
|
|
def run_detector(self, callback: Callable[[DetectedCommit], None]):
|
|
|
|
detector = Detector(self.__main_repo_path, self._dirs.cache_dir, self._config.cloner_project_name)
|
|
|
|
detector.run(callback)
|