Repo cloner: list submodules

Signed-off-by: Václav Valíček <valicek1994@gmail.com>
This commit is contained in:
Václav Valíček 2022-07-29 04:22:05 +02:00
parent d0c808ab0f
commit 8a150c63c5
Signed by: valicek
GPG Key ID: FF05BDCA0C73BB31
2 changed files with 94 additions and 0 deletions

View File

@ -1,3 +1,4 @@
import git
from git import Repo from git import Repo
from git.exc import NoSuchPathError, InvalidGitRepositoryError from git.exc import NoSuchPathError, InvalidGitRepositoryError
from git import RemoteProgress from git import RemoteProgress
@ -181,3 +182,37 @@ class RepoTool:
x = hashlib.sha256(cumulative).hexdigest() x = hashlib.sha256(cumulative).hexdigest()
log.debug(f"Repo fingerprint is {x}") log.debug(f"Repo fingerprint is {x}")
return x return x
def list_submodules(self, commit: str = "HEAD") -> list:
commit = self._repo.commit(commit)
submodules = []
if '.gitmodules' in commit.tree:
git_modules: git.Blob = commit.tree['.gitmodules']
from io import BytesIO
x = BytesIO(git_modules.data_stream.read())
x.name = git_modules.name
parser = git.GitConfigParser(x, read_only = True)
for section in parser.sections():
if parser.has_option(section, "url"):
submodules.append(parser.get_value(section, "url"))
return submodules
def list_submodules_history(self, limit_of_commits: Optional[int] = None):
log.info(f"Listing repository submodule history")
iterator = self._repo.iter_commits(all = True, max_count = limit_of_commits)
submodules = set()
counter: int = 0
last_status = time.time()
status_offset = 0.5
for commit in iterator:
counter += 1
commit_submodules = self.list_submodules(commit)
submodules.update(commit_submodules)
if time.time() >= last_status + status_offset:
log.info(f"Submodule discovery: {counter} commits finished, {len(submodules)} discovered")
last_status = time.time()
return list(submodules)

View File

@ -419,3 +419,62 @@ def test_fingerprint(support_data_path: Path, repo: str, hash):
assert not rt.initialized assert not rt.initialized
assert rt.repo_fingerprint() == hash assert rt.repo_fingerprint() == hash
def test_list_submodules_no_submodules(cloned_base_repo_obj):
assert cloned_base_repo_obj.list_submodules() == []
def test_list_submodules_ok(tmp_path, support_data_path):
rt = RepoTool(tmp_path.joinpath("repo.git").as_posix())
rt.clone(support_data_path.joinpath("test-repo-submodules").as_uri())
assert rt.list_submodules() == ['https://git.sw3.cz/kamikaze/test-repo-base.git']
assert rt.list_submodules("HEAD") == ['https://git.sw3.cz/kamikaze/test-repo-base.git']
assert rt.list_submodules("cc58d514348d0d2c8f0b75ad1f7ff96eb02781d5") == [
'https://git.sw3.cz/kamikaze/test-repo-base.git',
'https://git.sw3.cz/kamikaze/test-repo-reduced.git'
]
def test_list_submodules_history(tmp_path, support_data_path):
rt = RepoTool(tmp_path.joinpath("repo.git").as_posix())
rt.clone(support_data_path.joinpath("test-repo-submodules").as_uri())
history = rt.list_submodules_history()
history.sort()
assert history == [
'https://git.sw3.cz/kamikaze/test-repo-base.git',
'https://git.sw3.cz/kamikaze/test-repo-different-tags.git',
'https://git.sw3.cz/kamikaze/test-repo-reduced.git',
]
assert rt.list_submodules_history(100) == []
assert rt.list_submodules_history(120) == ['https://git.sw3.cz/kamikaze/test-repo-different-tags.git']
history = rt.list_submodules_history(320)
history.sort()
assert history == [
'https://git.sw3.cz/kamikaze/test-repo-base.git',
'https://git.sw3.cz/kamikaze/test-repo-different-tags.git',
'https://git.sw3.cz/kamikaze/test-repo-reduced.git',
]
def test_list_submodules_history_progress(support_data_path, caplog, monkeypatch):
mocked_time = 1659059078
def fake_time() -> float:
nonlocal mocked_time
mocked_time += 0.2
return mocked_time
rt = RepoTool(support_data_path.joinpath("test-repo-submodules-long").as_posix())
caplog.set_level(logging.INFO)
caplog.clear()
with monkeypatch.context() as m:
import time
m.setattr(time, "time", fake_time)
rt.list_submodules_history(22)
assert all(x.levelname == "INFO" for x in caplog.records)
import re
regex = re.compile("Submodule discovery: \\d+ commits finished, 1 discovered")
assert 8 == len(caplog.records)
assert 7 == sum(1 if regex.match(x.message) else 0 for x in caplog.records)