[projects/git-slug] Automatyczny retry przy przejściowych błędach sieciowych
arekm
arekm at pld-linux.org
Tue May 12 15:09:05 CEST 2026
commit d85f1a2bf8557e06405515e08d6e9c6eae1a3740
Author: Arkadiusz Miśkiewicz <arekm at maven.pl>
Date: Thu Apr 9 23:36:06 2026 +0200
Automatyczny retry przy przejściowych błędach sieciowych
git_passthrough_worker ponawia komendę do 3 razy (konfigurowalny
przez PLD.retries) z rosnącym opóźnieniem (5s, 10s, 20s) gdy git
zgłosi znany błąd przejściowy (connection reset, timeout, DNS, itp.)
zamiast od razu raportować porażkę.
Komunikaty retry pokazują numer próby: retry [1/3] in 5s...,
retry [1/3] succeeded / retry [3/3] failed, giving up.
slug.py | 61 +++++++++++++++++++++++++++++++++++++++++------
tests/conftest.py | 1 +
tests/test_passthrough.py | 2 ++
3 files changed, 57 insertions(+), 7 deletions(-)
---
diff --git a/slug.py b/slug.py
index 29c1fcd..60419cf 100755
--- a/slug.py
+++ b/slug.py
@@ -302,6 +302,7 @@ def apply_defaults(options):
CONFIG_KEYS = {
'packagesdir': ('PLD.packagesdir', str),
'jobs': ('PLD.jobs', int),
+ 'retries': ('PLD.retries', int),
}
# Layer 2: fill from git config if CLI didn't set the value
@@ -320,6 +321,7 @@ def apply_defaults(options):
# workers = more parallelism. Capped at 32 to stay within typical
# SSH MaxStartups limits on git servers.
'jobs': lambda: min(cpu_count() * 4, 32),
+ 'retries': lambda: 3,
'quiet': lambda: False,
'pattern': lambda: ['*'],
}
@@ -454,24 +456,69 @@ def build_git_cmd(repo_dir, git_command, git_args, config_pairs):
return cmd
-def git_passthrough_worker(repo_dir, git_command, git_args, config_pairs, quiet):
+_GIT_TRANSIENT_ERRORS = (
+ 'Connection reset by peer',
+ 'Connection timed out',
+ 'Could not resolve hostname',
+ 'Unable to look up',
+ 'The remote end hung up unexpectedly',
+ 'early EOF',
+ 'SSL_read:',
+)
+
+# Seconds to wait between retries (doubles each attempt)
+_GIT_RETRY_DELAY = 5
+
+
+def _is_transient_error(stderr_str):
+ """Check if stderr contains a known transient network error."""
+ return any(msg in stderr_str for msg in _GIT_TRANSIENT_ERRORS)
+
+
+def git_passthrough_worker(repo_dir, git_command, git_args, config_pairs, quiet, retries):
"""Run a git command in one repo, capture and prefix output.
Called by the worker pool for each repo in parallel.
Returns repo_dir on failure (so the caller can count failures), None on success.
+ Retries up to 'retries' times on transient network errors
+ (connection reset, timeout, etc.) with exponential backoff.
+
Output policy:
- Failed repos: always print everything (stdout + stderr) regardless of -q.
- Successful repos, stderr: always print (may contain warnings).
- Successful repos, stdout: print unless -q (quiet suppresses data output).
"""
+ import time
directory = os.path.basename(repo_dir)
cmd = build_git_cmd(repo_dir, git_command, git_args, config_pairs)
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- preexec_fn=_reset_sigint)
- stdout_bytes, stderr_bytes = proc.communicate()
- stdout_str = stdout_bytes.decode('utf-8', errors='replace')
- stderr_str = stderr_bytes.decode('utf-8', errors='replace')
+
+ delay = _GIT_RETRY_DELAY
+ for attempt in range(retries + 1):
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ preexec_fn=_reset_sigint)
+ stdout_bytes, stderr_bytes = proc.communicate()
+ stdout_str = stdout_bytes.decode('utf-8', errors='replace')
+ stderr_str = stderr_bytes.decode('utf-8', errors='replace')
+
+ if proc.returncode == 0 or not _is_transient_error(stderr_str):
+ if attempt > 0 and proc.returncode == 0:
+ print_prefixed('retry [{}/{}] succeeded'.format(
+ attempt, retries),
+ directory, sys.stderr)
+ break
+
+ # Transient error — retry after backoff
+ if attempt < retries:
+ print_prefixed('transient error, retry [{}/{}] in {}s...'.format(
+ attempt + 1, retries, delay),
+ directory, sys.stderr)
+ time.sleep(delay)
+ delay *= 2
+ else:
+ print_prefixed('retry [{}/{}] failed, giving up'.format(
+ attempt, retries),
+ directory, sys.stderr)
# Always show stderr (may contain warnings even on success).
print_prefixed(stderr_str, directory, sys.stderr)
@@ -506,7 +553,7 @@ def passthrough_command(options, git_command, git_args):
file=sys.stderr)
# Build argument tuples for pool.starmap()
- args = [(r, git_command, git_args, config_pairs, options.quiet)
+ args = [(r, git_command, git_args, config_pairs, options.quiet, options.retries)
for r in repos]
failed = run_worker(git_passthrough_worker, options, args)
diff --git a/tests/conftest.py b/tests/conftest.py
index c9545e3..d461f43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,6 +10,7 @@ def make_options():
"packagesdir": "/pkgs",
"jobs": 2,
"quiet": False,
+ "retries": 3,
"pattern": ["*"],
"had_errors": False,
}
diff --git a/tests/test_passthrough.py b/tests/test_passthrough.py
index 658e8e3..597b8bd 100644
--- a/tests/test_passthrough.py
+++ b/tests/test_passthrough.py
@@ -48,6 +48,7 @@ def test_git_passthrough_worker_prints_all_output_for_failures(monkeypatch, caps
[],
["pull.rebase=true"],
quiet=True,
+ retries=0,
)
captured = capsys.readouterr()
@@ -69,6 +70,7 @@ def test_git_passthrough_worker_quiet_suppresses_success_stdout_only(monkeypatch
[],
[],
quiet=True,
+ retries=0,
)
captured = capsys.readouterr()
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/projects/git-slug.git/commitdiff/4a7e426b8f1a3571094b5dc89412bc49b8f29666
More information about the pld-cvs-commit
mailing list