diff --git a/repo_cloner/lib/repo_tool.py b/repo_cloner/lib/repo_tool.py index 084ae14..211856c 100644 --- a/repo_cloner/lib/repo_tool.py +++ b/repo_cloner/lib/repo_tool.py @@ -2,12 +2,13 @@ import os.path import git from git import Repo -from git.exc import NoSuchPathError, InvalidGitRepositoryError +from git.exc import NoSuchPathError, InvalidGitRepositoryError, GitCommandError from git import RemoteProgress import logging import time from typing import Union, Optional from repo_cloner.lib.checksum import gen_repo_hashed_name +from io import BytesIO log = logging.getLogger("rc.repo") @@ -140,6 +141,12 @@ class RepoTool: def discovered_submodules_commits(self) -> list: return self._submodule_discovery_history + def _persist_submodule_commits(self, path: str): + with open(path, "w") as f: + for commit in self.discovered_submodules_commits: + line = f"{commit}\n" + f.write(line) + def __check_initialized(self): def inner(*args): fake_self: RepoTool = args[0] @@ -156,13 +163,20 @@ class RepoTool: return False log.info(f"Cloning repository from url: {url}") - self._repo = Repo.clone_from( - url, - to_path = self._path, - progress = GitRemoteProgress(), - bare = True, - mirror = True - ) + try: + self._repo = Repo.clone_from( + url, + to_path = self._path, + progress = GitRemoteProgress(), + bare = True, + mirror = True + ) + except GitCommandError as e: + log.critical(f"Clone of {url} failed!") + log.critical(f"Exception: {e}") + self._initialized = False + return False + self._initialized = True self._bare = self._repo.bare @@ -207,14 +221,16 @@ class RepoTool: if '.gitmodules' in commit.tree: git_modules: git.Blob = commit.tree['.gitmodules'] - from io import BytesIO x = BytesIO(git_modules.data_stream.read()) x.name = git_modules.name - parser = git.GitConfigParser(x, read_only = True) - for section in parser.sections(): - if parser.has_option(section, "url"): - submodules.append(parser.get_value(section, "url")) + try: + parser = git.GitConfigParser(x, read_only = True) + for section in parser.sections(): + if parser.has_option(section, "url"): + submodules.append(parser.get_value(section, "url")) + except BaseException: + return False if commit.hexsha not in self._submodule_discovery_history: self._submodule_discovery_history.append(commit.hexsha) @@ -233,7 +249,8 @@ class RepoTool: for commit in iterator: counter += 1 commit_submodules = self.list_submodules(commit) - submodules.update(commit_submodules) + if not type(commit_submodules) == bool: + submodules.update(commit_submodules) if time.time() >= last_status + status_offset: log.info(f"Submodule discovery: {counter} commits finished, {len(submodules)} discovered") last_status = time.time() @@ -242,7 +259,7 @@ class RepoTool: def clone_recursive( self, main_url: str, - scan_cache_dir: Optional[str] == None, + scan_cache_dir: Optional[str] = None, scan_depth: Optional[int] = None ) -> bool: @@ -286,7 +303,24 @@ class RepoTool: everything_succeed = False continue + # persistor + if scan_cache_dir: + cache_file = os.path.join(scan_cache_dir, gen_repo_hashed_name(url)) + log.debug( + f"Saving {len(submodule_cloner.discovered_submodules_commits)} commits into {cache_file}") + submodule_cloner._persist_submodule_commits(cache_file) + for submodule in submodules: self._recursive_discovery_urls.add(submodule) + if scan_cache_dir: + # persist main repo commits + cache_file = os.path.basename(self.path) + cache_file = os.path.join(scan_cache_dir, cache_file) + self._persist_submodule_commits(cache_file) + + # persist discovered submodule urls + with open(os.path.join(scan_cache_dir, "submodules.cache"), "w") as f: + f.write("\n".join(self.cloned_submodules_url_list)) + return everything_succeed diff --git a/tests/_support_data/.gitignore b/tests/_support_data/.gitignore index 94e3051..e84358e 100644 --- a/tests/_support_data/.gitignore +++ b/tests/_support_data/.gitignore @@ -1,2 +1,2 @@ tool_repos - +test-submodules-adhoc diff --git a/tests/_support_data/gen-data.sh b/tests/_support_data/gen-data.sh index 457157c..1c6569d 100755 --- a/tests/_support_data/gen-data.sh +++ b/tests/_support_data/gen-data.sh @@ -25,3 +25,68 @@ do done # recover submodules to initial state (branches will remain) git submodule update --init + +if [ -d test-submodules-adhoc ] +then + echp "Removing adhoc submodules" + rm -Rf test-submodules-adhoc + fi + +echo "Creating adhoc submodules" +mkdir test-submodules-adhoc +cd test-submodules-adhoc + +# git submodules level two +git init submodules-level-two +cd submodules-level-two +git config user.name Tester +git config user.email test@tester.me +git submodule add $new_pwd/test-repo-base +git submodule add $new_pwd/test-repo-reduced +git add -A +git commit -m "first commit" +cd .. + +# git submodules root level +git init submodules-root +cd submodules-root +git config user.name Tester +git config user.email test@tester.me +git submodule add $new_pwd/test-repo-base +git submodule add $new_pwd/test-repo-different-tags +git add -A +git commit -m "initial commit" +git submodule add $new_pwd/test-repo-new-commits +git submodule add $new_pwd/test-submodules-adhoc/submodules-level-two +git add -A +git commit -m "added submodule recursion" +cd .. + + +# submodule with failed url +git init submodule-failed-cfg +cd submodule-failed-cfg +git config user.name Tester +git config user.email test@tester.me +echo "# komentář" > .gitmodules +dd if=/dev/urandom bs=20 count=1 > .gitmodules +git add -A +git commit -m "failed .gitmodules" +cd .. + +# submodule root with two fucked-up submodules +git init submodule-failed +cd submodule-failed +git config user.name Tester +git config user.email test@tester.me +git submodule add $new_pwd/test-submodules-adhoc/submodule-failed-cfg +git submodule add $new_pwd/test-repo-base +git submodule deinit test-repo-base +sed -i 's/_support_data\/test-repo-base/_support_\/test-repo-base/g' .gitmodules +git add -A +git commit -m "submodules created, but would fail" +cd .. + + +# return back +cd $new_pwd diff --git a/tests/lib/test_repo_tool.py b/tests/lib/test_repo_tool.py index 66ba215..18d0a44 100644 --- a/tests/lib/test_repo_tool.py +++ b/tests/lib/test_repo_tool.py @@ -5,6 +5,7 @@ import pytest from cloner_test_fixtures import support_data_path, path_repo_base, path_repo_changed_branches, \ path_repo_different_tags, path_repo_new_commits, path_repo_reduced from pathlib import Path +from repo_cloner.lib.checksum import gen_repo_hashed_name from repo_cloner.lib.repo_tool import RepoTool base_repo_branches = [ @@ -97,7 +98,7 @@ def test_discovered_submodules_commits(tmp_path, monkeypatch): 'a22b74fba976631f123d4b2348aba531cf6430fd', 'b1b0554e60fc5f0feb542bf54d1cadbc1d0418d6', 'd0c808ab0fc075497cb50d9c704b024bcc6cfa95', - 'f8e168561a824da72f7d441932e77f3912039f9a,', + 'f8e168561a824da72f7d441932e77f3912039f9a', '8a150c63c5b688f39db15769db5c7d7c0fd52349', ] rt = RepoTool(tmp_path.as_posix()) @@ -106,6 +107,21 @@ def test_discovered_submodules_commits(tmp_path, monkeypatch): assert rt.discovered_submodules_commits == commits +def test__persist_submodule_commits(tmp_path, monkeypatch): + commits = [ + 'a22b74fba976631f123d4b2348aba531cf6430fd', + 'b1b0554e60fc5f0feb542bf54d1cadbc1d0418d6', + 'd0c808ab0fc075497cb50d9c704b024bcc6cfa95', + 'f8e168561a824da72f7d441932e77f3912039f9a', + '8a150c63c5b688f39db15769db5c7d7c0fd52349', + ] + rt = RepoTool(tmp_path.as_posix()) + monkeypatch.setattr(rt, "_submodule_discovery_history", commits) + rt._persist_submodule_commits(tmp_path.joinpath("cache").as_posix()) + saved = tmp_path.joinpath("cache").read_text() + assert commits == saved.strip().split("\n") + + def test_clone_initialized_repo(tmp_path, caplog, support_data_path): from git import Repo # initialize repo @@ -464,6 +480,12 @@ def test_list_submodules_ok(tmp_path, support_data_path): ] +def test_list_submodules_exception(support_data_path): + repo = support_data_path.joinpath("test-submodules-adhoc", "submodule-failed-cfg").as_posix() + rt = RepoTool(repo) + assert not rt.list_submodules() + + def test_list_submodules_history(tmp_path, support_data_path): rt = RepoTool(tmp_path.joinpath("repo.git").as_posix()) rt.clone(support_data_path.joinpath("test-repo-submodules").as_uri()) @@ -510,3 +532,74 @@ def test_list_submodules_history_progress(support_data_path, caplog, monkeypatch assert 7 == sum(1 if regex.match(x.message) else 0 for x in caplog.records) assert len(rt.discovered_submodules_commits) == 22 + + +def test_clone_recursive_failed_repo(tmp_path, monkeypatch, caplog): + rt = RepoTool(tmp_path.joinpath("r.git").as_posix()) + monkeypatch.setattr(rt, "clone", lambda x: False) + caplog.clear() + assert not rt.clone_recursive(tmp_path.as_uri()) + assert caplog.records[0].levelname == "CRITICAL" + assert caplog.records[0].message == "Clone of main repository failed!" + + +def test_clone_with_recursive_repos(tmp_path, support_data_path): + main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodules-root") + rt = RepoTool(tmp_path.joinpath("r.git").as_posix()) + assert rt.clone_recursive(main_repo.as_uri()) + + submodules_should_be_cloned = [ + 'test-repo-reduced', + 'test-submodules-adhoc/submodules-level-two', + 'test-repo-different-tags', + 'test-repo-base', + 'test-repo-new-commits' + ] + assert len(submodules_should_be_cloned) == len(rt.cloned_submodules_url_list) + for submodule in submodules_should_be_cloned: + path = support_data_path.joinpath(submodule).as_posix() + base = gen_repo_hashed_name(path) + assert tmp_path.joinpath(base).is_dir() + + +def test_clone_with_recursion_persist(tmp_path, support_data_path): + main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodules-root") + rt = RepoTool(tmp_path.joinpath("r.git").as_posix()) + cache = tmp_path.joinpath("___") + cache.mkdir() + assert rt.clone_recursive(main_repo.as_uri(), cache.as_posix()) + + submodules_should_be_cloned = [ + ('test-repo-reduced', 327), + ('test-submodules-adhoc/submodules-level-two', 1), + ('test-repo-different-tags', 339), + ('test-repo-base', 339), + ('test-repo-new-commits', 345), + ] + assert len(submodules_should_be_cloned) == len(rt.cloned_submodules_url_list) + + for submodule, refs in submodules_should_be_cloned: + path = support_data_path.joinpath(submodule).as_posix() + base = gen_repo_hashed_name(path) + assert tmp_path.joinpath(base).is_dir() + + # test cache file + assert cache.joinpath(base).is_file() + lines = cache.joinpath(base).read_text().strip().split("\n") + assert len(lines) == refs + + # root repo cache + assert cache.joinpath("r.git").is_file() + assert 2 == len(cache.joinpath("r.git").read_text().strip().split("\n")) + + +def test_clone_with_recursion_fail(tmp_path, support_data_path, caplog): + main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodule-failed") + rt = RepoTool(tmp_path.joinpath("r.git").as_posix()) + assert not rt.clone_recursive(main_repo.as_uri()) + assert f"Clone of {support_data_path.parent.as_posix()}/_support_/test-repo-base failed!" in caplog.text + assert f"Exception: Cmd('git') failed due to: exit code(128)" in caplog.text + assert set(rt.cloned_submodules_url_list) == { + f"{support_data_path.parent.as_posix()}/_support_/test-repo-base", + f"{support_data_path.as_posix()}/test-submodules-adhoc/submodule-failed-cfg" + }