diff --git a/repo_cloner/lib/__init__.py b/repo_cloner/lib/__init__.py index e69de29..0007d3b 100644 --- a/repo_cloner/lib/__init__.py +++ b/repo_cloner/lib/__init__.py @@ -0,0 +1,9 @@ +from .checksum import gen_repo_hashed_name +from .cloner_config import ClonerConfig, ClonerConfigParser +from .config_file_not_found_error import ConfigFileNotFoundError +from .default_cloner_config import DefaultClonerConfig +from .dir_not_found_error import DirNotFoundError +from .disk_stored_list import DiskStoredList +from .repo_tool import RepoTool +from .repo_dir_structure import RepoDirStructure +from .cloner import Cloner diff --git a/repo_cloner/lib/cloner.py b/repo_cloner/lib/cloner.py index 4f27afa..f27545f 100644 --- a/repo_cloner/lib/cloner.py +++ b/repo_cloner/lib/cloner.py @@ -1,8 +1,7 @@ -from repo_cloner.lib.cloner_config import ClonerConfig -from repo_cloner.lib.repo_dir_structure import RepoDirStructure -from repo_cloner.lib.dir_not_found_error import DirNotFoundError -from repo_cloner.lib.repo_tool import RepoTool -from repo_cloner.lib.checksum import gen_repo_hashed_name +from repo_cloner.lib import gen_repo_hashed_name +from repo_cloner.lib import DirNotFoundError +from repo_cloner.lib import ClonerConfig, DiskStoredList, RepoDirStructure, RepoTool + from pathlib import Path from typing import Optional from time import time @@ -40,7 +39,6 @@ class Cloner: log.info("Submodule cache dir does not exist! -> creating") Path(self.__submodule_cache).mkdir(parents = True) - def check_interval(self): log.debug(f"Checking interval for {self._config.cloner_project_name}") # get interval @@ -82,12 +80,97 @@ class Cloner: def __main_repo_path(self) -> str: return self._repo_path_by_url(self._config.cloner_repo_url) + @classmethod + def check_submodules_repo(cls, repo_tool: RepoTool, cache_file: str, submodule_list: DiskStoredList, + scan_depth: Optional[int]): + base = os.path.basename(repo_tool.path) + log.debug(f"Loading submodule cache for repo {base}") + repo_commits = DiskStoredList(cache_file) + log.debug(f"Loaded {len(repo_commits)} commits") + + # list fetched repo + log.debug(f"Scanning repo {base} for new submodules") + new_commits = repo_tool.list_commits(scan_depth) + + # discover new submodules in new commits + for commit in new_commits: + log.debug(f"Discovering submodules in {commit.hexsha}") + if commit.hexsha in repo_commits: + log.debug(f"Cached commit... Okay") + continue + discovered = repo_tool.list_submodules(commit) + if discovered: + for submodule in discovered: + if submodule not in submodule_list: + log.warning(f"Found new submodule: {submodule}") + submodule_list.append(submodule) + repo_commits.append(commit.hexsha) + return submodule_list + def sync(self) -> bool: + if not self.__opened: self._repo = RepoTool(self.__main_repo_path) if not self._repo.initialized: return False - return self._repo.fetch() + # determine recursive behavior + if not self._config.cloner_submodules: + return self._repo.fetch() + + # recursive now + if not self._repo.fetch(): + log.critical(f"Repo fetch failed for {self._config.cloner_project_name}") + return False + + log.debug(f"Loading submodules.cache") + submodules = DiskStoredList(os.path.join(self.__submodule_cache, "submodules.cache")) + log.debug(f"Loaded submodules.cache - {len(submodules)} items") + path = gen_repo_hashed_name(self._config.cloner_repo_url) + log.debug(f"Main repo hashed name to load: {path}") + + # recursion limit? + scan_depth = self._config.cloner_submodule_depth + log.debug(f"Scan depth is {scan_depth} commits") + if scan_depth == 0: + log.debug(f"Repository scan depth is not limited! -> setting scan_depth to none") + scan_depth = None + + submodules = Cloner.check_submodules_repo( + self._repo, os.path.join(self.__submodule_cache, path), submodules, scan_depth) + + everything_succeed: bool = True + everything_checked: bool = False + fetched_repos = set() + while not everything_checked: + # recursively scan and clone repositories + everything_checked = True + # for every url in list + # list() is needed - Runtime Error for set() changed during iteration + for url in list(submodules): + if url not in fetched_repos: + everything_checked = False + # generate new path + directory = os.path.dirname(self.__main_repo_path) + submodule_cloner = RepoTool(os.path.join(directory, gen_repo_hashed_name(url))) + # clone or checkout? + if not submodule_cloner.initialized: + log.info(f"New uninitialized submodule found: {url}. Cloning...") + checked: bool = submodule_cloner.clone(url) + else: + checked: bool = submodule_cloner.fetch() + + # mark cloned even if failed afterwards - while loop stuck solution + fetched_repos.add(url) + if not checked: + log.critical(f"Clone/fetch of submodule: {url} failed") + everything_succeed = False + continue + + submodules = Cloner.check_submodules_repo( + submodule_cloner, + os.path.join(self.__submodule_cache, gen_repo_hashed_name(url)), + submodules, scan_depth) + return everything_succeed def perform_check(self): log.info(f"Started check for {self._config.cloner_project_name}, url: {self._config.cloner_repo_url}") diff --git a/repo_cloner/lib/repo_tool.py b/repo_cloner/lib/repo_tool.py index 211856c..c0e3bea 100644 --- a/repo_cloner/lib/repo_tool.py +++ b/repo_cloner/lib/repo_tool.py @@ -184,7 +184,7 @@ class RepoTool: @__check_initialized def fetch(self) -> bool: - log.info("Fetching repo state") + log.info("Fetching repo :)") if not len(self._repo.remotes): log.warning(f"Repo: {self._path} does not contain any remotes!") return False @@ -214,6 +214,10 @@ class RepoTool: log.debug(f"Repo fingerprint is {x}") return x + @__check_initialized + def list_commits(self, max_depth: Optional[int] = None): + return self._repo.iter_commits(all = True, max_count = max_depth) + @__check_initialized def list_submodules(self, commit: str = "HEAD") -> Union[list, bool]: commit = self._repo.commit(commit) @@ -241,7 +245,7 @@ class RepoTool: def list_submodules_history(self, limit_of_commits: Optional[int] = None) -> Union[list, bool]: log.info(f"Listing repository submodule history") - iterator = self._repo.iter_commits(all = True, max_count = limit_of_commits) + iterator = self.list_commits(limit_of_commits) submodules = set() counter: int = 0 last_status = time.time() diff --git a/tests/lib/test_cloner.py b/tests/lib/test_cloner.py index 79277b5..a08e9ad 100644 --- a/tests/lib/test_cloner.py +++ b/tests/lib/test_cloner.py @@ -1,12 +1,10 @@ import git import pytest -from repo_cloner.lib.checksum import gen_repo_hashed_name -import repo_cloner.lib.dir_not_found_error +from repo_cloner.lib import gen_repo_hashed_name, DirNotFoundError, Cloner from cloner_test_fixtures import * -from repo_cloner.lib.cloner import Cloner from pathlib import Path import logging -import unittest.mock +from unittest.mock import patch, PropertyMock def mock_time() -> float: @@ -35,7 +33,7 @@ class MockDirStruct: @property def cache_dir_exists(self): if self.raise_cache_exists: - raise repo_cloner.lib.dir_not_found_error.DirNotFoundError("mock_dir") + raise DirNotFoundError("mock_dir") return True @@ -266,7 +264,6 @@ def test_clone_initialized(tmp_path, path_repo_base, caplog): def test_clone_recursive(tmp_path, path_repo_base, caplog): - from unittest.mock import patch, PropertyMock mock = MockDirStruct(tmp_path) mock.config.cloner_repo_url = "https://repo" mock.config.cloner_submodules = True diff --git a/tests/lib/test_repo_tool.py b/tests/lib/test_repo_tool.py index 18d0a44..42a14c7 100644 --- a/tests/lib/test_repo_tool.py +++ b/tests/lib/test_repo_tool.py @@ -458,6 +458,36 @@ def test_fingerprint(support_data_path: Path, repo: str, hash): assert rt.repo_fingerprint() == hash +def test_list_commits_unopened(tmp_path): + rt = RepoTool(tmp_path) + assert not rt.list_commits() + + +def test_list_commits(cloned_base_repo_obj): + assert cloned_base_repo_obj.list_commits(3) + count = 0 + commits = set() + for commit in cloned_base_repo_obj.list_commits(20): + count += 1 + commits.add(commit.hexsha) + assert count == 20 + + assert commits == { + '93e97409040f6e4ba5cebc4ddca679a2d3203efa', 'd8cd3257e05dc4a3d0061b5320ef5efd0b45d840', + 'f916de4ff4c65b93250716354917488ea480f592', 'd6b1a8d59185437ea2fd0055700b4656707ba2d2', + '10c0963c6ded9e41bcb82cf524661d00383d07b4', '06bdaca2c1bbb0b599fdace8698214f82c5850d6', + '95d31440dbcaf38e596e6b712d0d288215ce2a56', '86dab14dfdb87738fe7ff3613222dffb6dd2f796', + '558f12143c6d6d60690acb291cf64a863e12c2d0', '30c00bf3850ec01341c7961a251dee2c4d4cb771', + '7bc1195651dd8bbc62bcc52edf6d07923b2d4f1a', '2cd8137eeb96e2d23169d5728e9071b0ed663677', + '786c4412069526051cbbce6a87e76a63aa64f1e7', '143637c4fc05837d238c0ca67543a4494c07cc85', + 'fa0c7b9ac17a342b51fb5414317b54a5dfd2074b', '77f77c3e8bca3f5b43b6f55a88d619b30f6a6ae0', + 'b3615798ace53b983251a57593b328e707a8f419', 'e0c7e2a72579e24657c05e875201011d2b48bf94', + 'c090c23662933aa5fbbc6385bfae17e316e56a84', '4f5e3068cce0bc562a1ac7719915a86a6d01283e' + } + + assert 339 == sum(1 for commit in cloned_base_repo_obj.list_commits()) + + def test_list_submodules_no_submodules(cloned_base_repo_obj): assert cloned_base_repo_obj.list_submodules() == [] assert cloned_base_repo_obj.discovered_submodules_commits == ["e0c7e2a72579e24657c05e875201011d2b48bf94"]