From 6463a6bb954beeef3200e35d21f0a4c4416165b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Val=C3=AD=C4=8Dek?= Date: Fri, 29 Jul 2022 17:03:15 +0200 Subject: [PATCH] Update, repo tool cloner: recursive clones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Václav Valíček --- old/checker/run-checker | 26 --------- old/src/mirror-main-repo | 32 +---------- repo_cloner/lib/repo_tool.py | 80 ++++++++++++++++++++++++++- repo_cloner/process_repository_dir.py | 33 +++++++++-- run-test | 2 +- tests/lib/test_repo_tool.py | 34 +++++++++++- 6 files changed, 142 insertions(+), 65 deletions(-) diff --git a/old/checker/run-checker b/old/checker/run-checker index 7158f42..de2517d 100755 --- a/old/checker/run-checker +++ b/old/checker/run-checker @@ -8,34 +8,10 @@ mydir=$(dirname $(realpath $0)) source $mydir/detector-lib-cfg - -# interval - in minutes -interval=${cloner_interval:-0} -stampfile=$CCLONE_CACHE/last-check-time - -# does it exist - if not, sync -[ -d $CCLONE_CACHE ] || mkdir $CCLONE_CACHE -[ -f $stampfile ] || echo 0 > $stampfile - - -now=$(date +"%s") -last=$(cat $stampfile) -diff=$(($now - $last)) -mindiff=$(($interval * 60)) -unset now last - -if [ $diff -lt $mindiff ] -then - echo "Limit not reached - not syncing now" - exit 0 -fi - # check and clone repo submodules=${cloner_submodules:-0} depth=${cloner_submodule_depth:-} -export HOME=$CCLONE_CACHE - prepareGitAuth $CONFIG_DIR # without submodule support @@ -46,8 +22,6 @@ else mirror-recursive $repo $depth fi -date +"%s" > $stampfile - # if detector is not enabled, quit quietly if ! detectorRunCapable then diff --git a/old/src/mirror-main-repo b/old/src/mirror-main-repo index dfca905..c599131 100755 --- a/old/src/mirror-main-repo +++ b/old/src/mirror-main-repo @@ -13,38 +13,12 @@ IFS=$'\n\t' source $(dirname $(realpath $0))/gen-mirror-path - function updateOrCreate(){ - url=$1 - repodir=$(getRepoPath $url) - - if [ ! -d $repodir ] - then - echo "Clone of $url" - git clone --bare --mirror $url $repodir - # create FETCH_HEAD needed by other scripts - cd $repodir - git fetch --prune - else - cd $repodir - echo "Update of $url" - git fetch --prune - fi + # implemented in RepoTool } function getLastCommit(){ - url=$1 - repodir=$(getRepoPath $url) - if [ -d $repodir ] - then - cd $repodir - git --no-pager log --full-history --all -1 --pretty=format:"%H%n" - else - echo '-' - fi - + # replaced by git_tool repo hash } -oldPwd=$(pwd) -updateOrCreate $1 -cd $oldPwd +updateOrCreate $1 \ No newline at end of file diff --git a/repo_cloner/lib/repo_tool.py b/repo_cloner/lib/repo_tool.py index 8a84582..084ae14 100644 --- a/repo_cloner/lib/repo_tool.py +++ b/repo_cloner/lib/repo_tool.py @@ -1,3 +1,5 @@ +import os.path + import git from git import Repo from git.exc import NoSuchPathError, InvalidGitRepositoryError @@ -5,6 +7,7 @@ from git import RemoteProgress import logging import time from typing import Union, Optional +from repo_cloner.lib.checksum import gen_repo_hashed_name log = logging.getLogger("rc.repo") @@ -64,7 +67,7 @@ class GitRemoteProgress(RemoteProgress): # so check timer if (self.last_step_time + self.time_thr) > time.time(): - # timer not passed yet + # timer not passed yet repo.head.reset(commit = 'origin/master', index = True, working_tree = True) if not ((op_code & self.BEGIN) or (op_code & self.BEGIN)): # skip -> no begin or end return @@ -97,10 +100,16 @@ class RepoTool: _bare: bool = False _path: str = "" _last_fetch_data = [] + _recursive_discovery_urls: set = set() + _recursive_discovery_cloned: set = set() + _submodule_discovery_history: list = [] def __init__(self, path: str): log.info(f"Initializing repository at {path}") self._path = str(path) + self._recursive_discovery_cloned = set() + self._recursive_discovery_urls = set() + self._submodule_discovery_history: list = [] try: self._repo = Repo(path, expand_vars = False) self._initialized = True @@ -123,6 +132,14 @@ class RepoTool: def path(self) -> str: return self._path + @property + def cloned_submodules_url_list(self) -> list: + return list(self._recursive_discovery_cloned) + + @property + def discovered_submodules_commits(self) -> list: + return self._submodule_discovery_history + def __check_initialized(self): def inner(*args): fake_self: RepoTool = args[0] @@ -183,7 +200,8 @@ class RepoTool: log.debug(f"Repo fingerprint is {x}") return x - def list_submodules(self, commit: str = "HEAD") -> list: + @__check_initialized + def list_submodules(self, commit: str = "HEAD") -> Union[list, bool]: commit = self._repo.commit(commit) submodules = [] @@ -198,9 +216,13 @@ class RepoTool: if parser.has_option(section, "url"): submodules.append(parser.get_value(section, "url")) + if commit.hexsha not in self._submodule_discovery_history: + self._submodule_discovery_history.append(commit.hexsha) + return submodules - def list_submodules_history(self, limit_of_commits: Optional[int] = None): + @__check_initialized + def list_submodules_history(self, limit_of_commits: Optional[int] = None) -> Union[list, bool]: log.info(f"Listing repository submodule history") iterator = self._repo.iter_commits(all = True, max_count = limit_of_commits) @@ -216,3 +238,55 @@ class RepoTool: log.info(f"Submodule discovery: {counter} commits finished, {len(submodules)} discovered") last_status = time.time() return list(submodules) + + def clone_recursive( + self, + main_url: str, + scan_cache_dir: Optional[str] == None, + scan_depth: Optional[int] = None + ) -> bool: + + log.info(f"Started recursive clone of {main_url} with recursive discovery limited to {scan_depth} commits") + # clone main repo + if not self.clone(main_url): + log.critical(f"Clone of main repository failed!") + return False + + # discover submodules for repository + submodules = self.list_submodules_history(scan_depth) + if submodules: + for submodule in submodules: + self._recursive_discovery_urls.add(submodule) + + everything_succeed: bool = True + everything_cloned: bool = False + while not everything_cloned: + # recursively scan and clone repositories + everything_cloned = True + # for every url in list + # list() is needed - Runtime Error for set() changed during iteration + for url in list(self._recursive_discovery_urls): + if url not in self._recursive_discovery_cloned: + everything_cloned = False + # generate new path + directory = os.path.dirname(self.path) + submodule_cloner = RepoTool(os.path.join(directory, gen_repo_hashed_name(url))) + # clone + cloned: bool = submodule_cloner.clone(url) + # mark cloned even if failed afterwards - while loop stuck solution + self._recursive_discovery_cloned.add(url) + if not cloned: + log.critical(f"Clone of submodule: {url} failed") + everything_succeed = False + continue + # scan for submodules + submodules = submodule_cloner.list_submodules_history(scan_depth) + if type(submodules) == bool and not submodules: + log.critical(f"Submodule discovery for {url} failed!") + everything_succeed = False + continue + + for submodule in submodules: + self._recursive_discovery_urls.add(submodule) + + return everything_succeed diff --git a/repo_cloner/process_repository_dir.py b/repo_cloner/process_repository_dir.py index eff91e1..288703c 100755 --- a/repo_cloner/process_repository_dir.py +++ b/repo_cloner/process_repository_dir.py @@ -6,9 +6,6 @@ import os import logging as l -# l.basicConfig(level = 0) - -# create console handler with a higher log level console_logger = l.StreamHandler() console_formatter = l.Formatter( "%(asctime)-15s :: [%(levelname)8s] :: %(name)-15s :: %(message)s (%(filename)s:%(lineno)s)", @@ -20,7 +17,11 @@ log.addHandler(console_logger) log.setLevel(logging.DEBUG) from repo_cloner.lib.repo_dir_structure import RepoDirStructure +from repo_cloner.lib.cloner import Cloner +from repo_cloner.lib.repo_tool import RepoTool from git.config import GitConfigParser +from git.repo import Repo +from typing import Union def config_try_override(config_writer: GitConfigParser, section: str, option: str, value: str): @@ -36,6 +37,10 @@ def config_try_override(config_writer: GitConfigParser, section: str, option: st def main() -> int: + def update(op_code: int, cur_count: Union[str, float], max_count: Union[str, float, None] = None, + message: str = ''): + log.debug(f"op: {op_code}; cur: {cur_count}/{max_count}; mess: {message}") + # parse input arguments parser = argparse.ArgumentParser(description = "repo-cloner entering script") parser.add_argument('--base-dir', help = 'path to directory containing whole cloner structure', required = True, @@ -72,11 +77,29 @@ def main() -> int: if len(config.cloner_project_name) == 0: log.warning("Config directive cloner_project_name should not be omitted!") + # cloner = Cloner(dirs) + # cloner.check_interval() + + import subprocess + subprocess.run(["/usr/bin/rm", "-Rf", "/tmp/test/repos"]) + subprocess.run(["/usr/bin/mkdir", "/tmp/test/repos"]) + + rt = RepoTool("/tmp/test/repos/main.git") + # rt.clone("https://github.com/u-boot/u-boot.git") + x = rt.clone_recursive("file:///home/vasek/dev/repo-cloner/tests/_support_data/test-repo-submodules-multilevel") + print(x) + + # url = "" + # for x in rt._repo.remote("origin").urls: + # url = x + # url = url.replace("test-repo-base", "test-repo-reduced") + # url = url.replace("test-repo-base", "test-repo-changed-branches") + # rt._repo.remote("origin").set_url(url) return 0 - from git import Repo - r = Repo("/home/vasek/dev/repo-cloner") + # from git import Repo + r = Repo("file:///home/vasek/dev/repo-cloner") path: str = r._get_config_path("user") print(path) path = os.path.dirname(path) diff --git a/run-test b/run-test index 39e10d8..ea57bd6 100755 --- a/run-test +++ b/run-test @@ -14,6 +14,6 @@ then --cov-report html \ --capture=no else - python3 -m pytest --capture=no $1 + python3 -m pytest --capture=no -v $1 fi diff --git a/tests/lib/test_repo_tool.py b/tests/lib/test_repo_tool.py index 2f10e4d..66ba215 100644 --- a/tests/lib/test_repo_tool.py +++ b/tests/lib/test_repo_tool.py @@ -79,12 +79,33 @@ def test_bare(tmp_path, monkeypatch): def test_path(tmp_path, monkeypatch): - rt = RepoTool(tmp_path) + rt = RepoTool(tmp_path.as_posix()) assert tmp_path.as_posix() == rt.path monkeypatch.setattr(rt, "_path", "/tmp") assert "/tmp" == rt.path +def test_cloned_submodules_url_list(tmp_path, monkeypatch): + rt = RepoTool(tmp_path.as_posix()) + assert rt.cloned_submodules_url_list == [] + monkeypatch.setattr(rt, "_recursive_discovery_cloned", {"https://repo.git/1", "git@hosting:/name/repo.git"}) + assert rt.cloned_submodules_url_list == list({"https://repo.git/1", "git@hosting:/name/repo.git"}) + + +def test_discovered_submodules_commits(tmp_path, monkeypatch): + commits = [ + 'a22b74fba976631f123d4b2348aba531cf6430fd', + 'b1b0554e60fc5f0feb542bf54d1cadbc1d0418d6', + 'd0c808ab0fc075497cb50d9c704b024bcc6cfa95', + 'f8e168561a824da72f7d441932e77f3912039f9a,', + '8a150c63c5b688f39db15769db5c7d7c0fd52349', + ] + rt = RepoTool(tmp_path.as_posix()) + assert rt.discovered_submodules_commits == [] + monkeypatch.setattr(rt, "_submodule_discovery_history", commits) + assert rt.discovered_submodules_commits == commits + + def test_clone_initialized_repo(tmp_path, caplog, support_data_path): from git import Repo # initialize repo @@ -423,6 +444,7 @@ def test_fingerprint(support_data_path: Path, repo: str, hash): def test_list_submodules_no_submodules(cloned_base_repo_obj): assert cloned_base_repo_obj.list_submodules() == [] + assert cloned_base_repo_obj.discovered_submodules_commits == ["e0c7e2a72579e24657c05e875201011d2b48bf94"] def test_list_submodules_ok(tmp_path, support_data_path): @@ -435,6 +457,12 @@ def test_list_submodules_ok(tmp_path, support_data_path): 'https://git.sw3.cz/kamikaze/test-repo-reduced.git' ] + assert rt.cloned_submodules_url_list == [] + assert rt.discovered_submodules_commits == [ + '1946eeb4dda03473e796a8cc78b1946fc85df0fd', + 'cc58d514348d0d2c8f0b75ad1f7ff96eb02781d5', + ] + def test_list_submodules_history(tmp_path, support_data_path): rt = RepoTool(tmp_path.joinpath("repo.git").as_posix()) @@ -457,6 +485,8 @@ def test_list_submodules_history(tmp_path, support_data_path): 'https://git.sw3.cz/kamikaze/test-repo-reduced.git', ] + assert len(rt.discovered_submodules_commits) == 645 + def test_list_submodules_history_progress(support_data_path, caplog, monkeypatch): mocked_time = 1659059078 @@ -478,3 +508,5 @@ def test_list_submodules_history_progress(support_data_path, caplog, monkeypatch regex = re.compile("Submodule discovery: \\d+ commits finished, 1 discovered") assert 8 == len(caplog.records) assert 7 == sum(1 if regex.match(x.message) else 0 for x in caplog.records) + + assert len(rt.discovered_submodules_commits) == 22