New tests for cloner.clone_recursive

Signed-off-by: Václav Valíček <valicek1994@gmail.com>
This commit is contained in:
Václav Valíček 2022-07-30 20:29:59 +02:00
parent 85355a4a1e
commit ae34a5e95f
Signed by: valicek
GPG Key ID: FF05BDCA0C73BB31
4 changed files with 209 additions and 17 deletions

View File

@ -2,12 +2,13 @@ import os.path
import git
from git import Repo
from git.exc import NoSuchPathError, InvalidGitRepositoryError
from git.exc import NoSuchPathError, InvalidGitRepositoryError, GitCommandError
from git import RemoteProgress
import logging
import time
from typing import Union, Optional
from repo_cloner.lib.checksum import gen_repo_hashed_name
from io import BytesIO
log = logging.getLogger("rc.repo")
@ -140,6 +141,12 @@ class RepoTool:
def discovered_submodules_commits(self) -> list:
return self._submodule_discovery_history
def _persist_submodule_commits(self, path: str):
with open(path, "w") as f:
for commit in self.discovered_submodules_commits:
line = f"{commit}\n"
f.write(line)
def __check_initialized(self):
def inner(*args):
fake_self: RepoTool = args[0]
@ -156,13 +163,20 @@ class RepoTool:
return False
log.info(f"Cloning repository from url: {url}")
self._repo = Repo.clone_from(
url,
to_path = self._path,
progress = GitRemoteProgress(),
bare = True,
mirror = True
)
try:
self._repo = Repo.clone_from(
url,
to_path = self._path,
progress = GitRemoteProgress(),
bare = True,
mirror = True
)
except GitCommandError as e:
log.critical(f"Clone of {url} failed!")
log.critical(f"Exception: {e}")
self._initialized = False
return False
self._initialized = True
self._bare = self._repo.bare
@ -207,14 +221,16 @@ class RepoTool:
if '.gitmodules' in commit.tree:
git_modules: git.Blob = commit.tree['.gitmodules']
from io import BytesIO
x = BytesIO(git_modules.data_stream.read())
x.name = git_modules.name
parser = git.GitConfigParser(x, read_only = True)
for section in parser.sections():
if parser.has_option(section, "url"):
submodules.append(parser.get_value(section, "url"))
try:
parser = git.GitConfigParser(x, read_only = True)
for section in parser.sections():
if parser.has_option(section, "url"):
submodules.append(parser.get_value(section, "url"))
except BaseException:
return False
if commit.hexsha not in self._submodule_discovery_history:
self._submodule_discovery_history.append(commit.hexsha)
@ -233,7 +249,8 @@ class RepoTool:
for commit in iterator:
counter += 1
commit_submodules = self.list_submodules(commit)
submodules.update(commit_submodules)
if not type(commit_submodules) == bool:
submodules.update(commit_submodules)
if time.time() >= last_status + status_offset:
log.info(f"Submodule discovery: {counter} commits finished, {len(submodules)} discovered")
last_status = time.time()
@ -242,7 +259,7 @@ class RepoTool:
def clone_recursive(
self,
main_url: str,
scan_cache_dir: Optional[str] == None,
scan_cache_dir: Optional[str] = None,
scan_depth: Optional[int] = None
) -> bool:
@ -286,7 +303,24 @@ class RepoTool:
everything_succeed = False
continue
# persistor
if scan_cache_dir:
cache_file = os.path.join(scan_cache_dir, gen_repo_hashed_name(url))
log.debug(
f"Saving {len(submodule_cloner.discovered_submodules_commits)} commits into {cache_file}")
submodule_cloner._persist_submodule_commits(cache_file)
for submodule in submodules:
self._recursive_discovery_urls.add(submodule)
if scan_cache_dir:
# persist main repo commits
cache_file = os.path.basename(self.path)
cache_file = os.path.join(scan_cache_dir, cache_file)
self._persist_submodule_commits(cache_file)
# persist discovered submodule urls
with open(os.path.join(scan_cache_dir, "submodules.cache"), "w") as f:
f.write("\n".join(self.cloned_submodules_url_list))
return everything_succeed

View File

@ -1,2 +1,2 @@
tool_repos
test-submodules-adhoc

View File

@ -25,3 +25,68 @@ do
done
# recover submodules to initial state (branches will remain)
git submodule update --init
if [ -d test-submodules-adhoc ]
then
echp "Removing adhoc submodules"
rm -Rf test-submodules-adhoc
fi
echo "Creating adhoc submodules"
mkdir test-submodules-adhoc
cd test-submodules-adhoc
# git submodules level two
git init submodules-level-two
cd submodules-level-two
git config user.name Tester
git config user.email test@tester.me
git submodule add $new_pwd/test-repo-base
git submodule add $new_pwd/test-repo-reduced
git add -A
git commit -m "first commit"
cd ..
# git submodules root level
git init submodules-root
cd submodules-root
git config user.name Tester
git config user.email test@tester.me
git submodule add $new_pwd/test-repo-base
git submodule add $new_pwd/test-repo-different-tags
git add -A
git commit -m "initial commit"
git submodule add $new_pwd/test-repo-new-commits
git submodule add $new_pwd/test-submodules-adhoc/submodules-level-two
git add -A
git commit -m "added submodule recursion"
cd ..
# submodule with failed url
git init submodule-failed-cfg
cd submodule-failed-cfg
git config user.name Tester
git config user.email test@tester.me
echo "# komentář" > .gitmodules
dd if=/dev/urandom bs=20 count=1 > .gitmodules
git add -A
git commit -m "failed .gitmodules"
cd ..
# submodule root with two fucked-up submodules
git init submodule-failed
cd submodule-failed
git config user.name Tester
git config user.email test@tester.me
git submodule add $new_pwd/test-submodules-adhoc/submodule-failed-cfg
git submodule add $new_pwd/test-repo-base
git submodule deinit test-repo-base
sed -i 's/_support_data\/test-repo-base/_support_\/test-repo-base/g' .gitmodules
git add -A
git commit -m "submodules created, but would fail"
cd ..
# return back
cd $new_pwd

View File

@ -5,6 +5,7 @@ import pytest
from cloner_test_fixtures import support_data_path, path_repo_base, path_repo_changed_branches, \
path_repo_different_tags, path_repo_new_commits, path_repo_reduced
from pathlib import Path
from repo_cloner.lib.checksum import gen_repo_hashed_name
from repo_cloner.lib.repo_tool import RepoTool
base_repo_branches = [
@ -97,7 +98,7 @@ def test_discovered_submodules_commits(tmp_path, monkeypatch):
'a22b74fba976631f123d4b2348aba531cf6430fd',
'b1b0554e60fc5f0feb542bf54d1cadbc1d0418d6',
'd0c808ab0fc075497cb50d9c704b024bcc6cfa95',
'f8e168561a824da72f7d441932e77f3912039f9a,',
'f8e168561a824da72f7d441932e77f3912039f9a',
'8a150c63c5b688f39db15769db5c7d7c0fd52349',
]
rt = RepoTool(tmp_path.as_posix())
@ -106,6 +107,21 @@ def test_discovered_submodules_commits(tmp_path, monkeypatch):
assert rt.discovered_submodules_commits == commits
def test__persist_submodule_commits(tmp_path, monkeypatch):
commits = [
'a22b74fba976631f123d4b2348aba531cf6430fd',
'b1b0554e60fc5f0feb542bf54d1cadbc1d0418d6',
'd0c808ab0fc075497cb50d9c704b024bcc6cfa95',
'f8e168561a824da72f7d441932e77f3912039f9a',
'8a150c63c5b688f39db15769db5c7d7c0fd52349',
]
rt = RepoTool(tmp_path.as_posix())
monkeypatch.setattr(rt, "_submodule_discovery_history", commits)
rt._persist_submodule_commits(tmp_path.joinpath("cache").as_posix())
saved = tmp_path.joinpath("cache").read_text()
assert commits == saved.strip().split("\n")
def test_clone_initialized_repo(tmp_path, caplog, support_data_path):
from git import Repo
# initialize repo
@ -464,6 +480,12 @@ def test_list_submodules_ok(tmp_path, support_data_path):
]
def test_list_submodules_exception(support_data_path):
repo = support_data_path.joinpath("test-submodules-adhoc", "submodule-failed-cfg").as_posix()
rt = RepoTool(repo)
assert not rt.list_submodules()
def test_list_submodules_history(tmp_path, support_data_path):
rt = RepoTool(tmp_path.joinpath("repo.git").as_posix())
rt.clone(support_data_path.joinpath("test-repo-submodules").as_uri())
@ -510,3 +532,74 @@ def test_list_submodules_history_progress(support_data_path, caplog, monkeypatch
assert 7 == sum(1 if regex.match(x.message) else 0 for x in caplog.records)
assert len(rt.discovered_submodules_commits) == 22
def test_clone_recursive_failed_repo(tmp_path, monkeypatch, caplog):
rt = RepoTool(tmp_path.joinpath("r.git").as_posix())
monkeypatch.setattr(rt, "clone", lambda x: False)
caplog.clear()
assert not rt.clone_recursive(tmp_path.as_uri())
assert caplog.records[0].levelname == "CRITICAL"
assert caplog.records[0].message == "Clone of main repository failed!"
def test_clone_with_recursive_repos(tmp_path, support_data_path):
main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodules-root")
rt = RepoTool(tmp_path.joinpath("r.git").as_posix())
assert rt.clone_recursive(main_repo.as_uri())
submodules_should_be_cloned = [
'test-repo-reduced',
'test-submodules-adhoc/submodules-level-two',
'test-repo-different-tags',
'test-repo-base',
'test-repo-new-commits'
]
assert len(submodules_should_be_cloned) == len(rt.cloned_submodules_url_list)
for submodule in submodules_should_be_cloned:
path = support_data_path.joinpath(submodule).as_posix()
base = gen_repo_hashed_name(path)
assert tmp_path.joinpath(base).is_dir()
def test_clone_with_recursion_persist(tmp_path, support_data_path):
main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodules-root")
rt = RepoTool(tmp_path.joinpath("r.git").as_posix())
cache = tmp_path.joinpath("___")
cache.mkdir()
assert rt.clone_recursive(main_repo.as_uri(), cache.as_posix())
submodules_should_be_cloned = [
('test-repo-reduced', 327),
('test-submodules-adhoc/submodules-level-two', 1),
('test-repo-different-tags', 339),
('test-repo-base', 339),
('test-repo-new-commits', 345),
]
assert len(submodules_should_be_cloned) == len(rt.cloned_submodules_url_list)
for submodule, refs in submodules_should_be_cloned:
path = support_data_path.joinpath(submodule).as_posix()
base = gen_repo_hashed_name(path)
assert tmp_path.joinpath(base).is_dir()
# test cache file
assert cache.joinpath(base).is_file()
lines = cache.joinpath(base).read_text().strip().split("\n")
assert len(lines) == refs
# root repo cache
assert cache.joinpath("r.git").is_file()
assert 2 == len(cache.joinpath("r.git").read_text().strip().split("\n"))
def test_clone_with_recursion_fail(tmp_path, support_data_path, caplog):
main_repo = support_data_path.joinpath("test-submodules-adhoc", "submodule-failed")
rt = RepoTool(tmp_path.joinpath("r.git").as_posix())
assert not rt.clone_recursive(main_repo.as_uri())
assert f"Clone of {support_data_path.parent.as_posix()}/_support_/test-repo-base failed!" in caplog.text
assert f"Exception: Cmd('git') failed due to: exit code(128)" in caplog.text
assert set(rt.cloned_submodules_url_list) == {
f"{support_data_path.parent.as_posix()}/_support_/test-repo-base",
f"{support_data_path.as_posix()}/test-submodules-adhoc/submodule-failed-cfg"
}