New tests for cloner.clone_recursive

Signed-off-by: Václav Valíček <valicek1994@gmail.com>
This commit is contained in:
2022-07-30 20:29:59 +02:00
parent 85355a4a1e
commit ae34a5e95f
4 changed files with 209 additions and 17 deletions

View File

@@ -2,12 +2,13 @@ import os.path
import git
from git import Repo
from git.exc import NoSuchPathError, InvalidGitRepositoryError
from git.exc import NoSuchPathError, InvalidGitRepositoryError, GitCommandError
from git import RemoteProgress
import logging
import time
from typing import Union, Optional
from repo_cloner.lib.checksum import gen_repo_hashed_name
from io import BytesIO
log = logging.getLogger("rc.repo")
@@ -140,6 +141,12 @@ class RepoTool:
def discovered_submodules_commits(self) -> list:
return self._submodule_discovery_history
def _persist_submodule_commits(self, path: str):
with open(path, "w") as f:
for commit in self.discovered_submodules_commits:
line = f"{commit}\n"
f.write(line)
def __check_initialized(self):
def inner(*args):
fake_self: RepoTool = args[0]
@@ -156,13 +163,20 @@ class RepoTool:
return False
log.info(f"Cloning repository from url: {url}")
self._repo = Repo.clone_from(
url,
to_path = self._path,
progress = GitRemoteProgress(),
bare = True,
mirror = True
)
try:
self._repo = Repo.clone_from(
url,
to_path = self._path,
progress = GitRemoteProgress(),
bare = True,
mirror = True
)
except GitCommandError as e:
log.critical(f"Clone of {url} failed!")
log.critical(f"Exception: {e}")
self._initialized = False
return False
self._initialized = True
self._bare = self._repo.bare
@@ -207,14 +221,16 @@ class RepoTool:
if '.gitmodules' in commit.tree:
git_modules: git.Blob = commit.tree['.gitmodules']
from io import BytesIO
x = BytesIO(git_modules.data_stream.read())
x.name = git_modules.name
parser = git.GitConfigParser(x, read_only = True)
for section in parser.sections():
if parser.has_option(section, "url"):
submodules.append(parser.get_value(section, "url"))
try:
parser = git.GitConfigParser(x, read_only = True)
for section in parser.sections():
if parser.has_option(section, "url"):
submodules.append(parser.get_value(section, "url"))
except BaseException:
return False
if commit.hexsha not in self._submodule_discovery_history:
self._submodule_discovery_history.append(commit.hexsha)
@@ -233,7 +249,8 @@ class RepoTool:
for commit in iterator:
counter += 1
commit_submodules = self.list_submodules(commit)
submodules.update(commit_submodules)
if not type(commit_submodules) == bool:
submodules.update(commit_submodules)
if time.time() >= last_status + status_offset:
log.info(f"Submodule discovery: {counter} commits finished, {len(submodules)} discovered")
last_status = time.time()
@@ -242,7 +259,7 @@ class RepoTool:
def clone_recursive(
self,
main_url: str,
scan_cache_dir: Optional[str] == None,
scan_cache_dir: Optional[str] = None,
scan_depth: Optional[int] = None
) -> bool:
@@ -286,7 +303,24 @@ class RepoTool:
everything_succeed = False
continue
# persistor
if scan_cache_dir:
cache_file = os.path.join(scan_cache_dir, gen_repo_hashed_name(url))
log.debug(
f"Saving {len(submodule_cloner.discovered_submodules_commits)} commits into {cache_file}")
submodule_cloner._persist_submodule_commits(cache_file)
for submodule in submodules:
self._recursive_discovery_urls.add(submodule)
if scan_cache_dir:
# persist main repo commits
cache_file = os.path.basename(self.path)
cache_file = os.path.join(scan_cache_dir, cache_file)
self._persist_submodule_commits(cache_file)
# persist discovered submodule urls
with open(os.path.join(scan_cache_dir, "submodules.cache"), "w") as f:
f.write("\n".join(self.cloned_submodules_url_list))
return everything_succeed