admin: ftp/find-duplicates2.py (NEW) find-duplicate.py rewritten to use Pol...
jajcus
jajcus at pld-linux.org
Fri Jun 29 13:59:57 CEST 2012
Author: jajcus Date: Fri Jun 29 11:59:57 2012 GMT
Module: admin Tag: HEAD
---- Log message:
find-duplicate.py rewritten to use Poldek's python bindings. A bit faster.
Different name, as it doesn't provide exactly the same functionality (regexp
matching is missing, but probably trivial to add).
---- Files affected:
admin/ftp:
find-duplicates2.py (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: admin/ftp/find-duplicates2.py
diff -u /dev/null admin/ftp/find-duplicates2.py:1.1
--- /dev/null Fri Jun 29 13:59:57 2012
+++ admin/ftp/find-duplicates2.py Fri Jun 29 13:59:52 2012
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+
+import os, sys, re, getopt, stat
+from collections import defaultdict
+
+import poldek
+
+default_out_name = "clean-dir"
+
+help_str = """
+Create a script to remove duplicate RPMS from a directory and report binary
+packages built from different versions of the same source package.
+
+USAGE: {0} [-o out_file] directory
+
+out_file defaults to '{1}'.
+""".format(sys.argv[0], default_out_name)
+
+RPM_FILENAME_RE = re.compile(r"^(.*)-([^-]*)-([^-]*)\.[^-\.]*\.rpm$")
+
+def make_list_dict():
+ return defaultdict(list)
+
+def find_dup_srpms(packages):
+ srpms = defaultdict(make_list_dict)
+
+ for pkg in packages.values():
+ source_filename = pkg.srcfilename_s()
+ if not source_filename:
+ continue
+ match = RPM_FILENAME_RE.match(source_filename)
+ if not match:
+ sys.stderr.write("problems with SRPM name: %s\n" % source_filename)
+ continue
+ source_name = match.group(1)
+ srpms[source_name][source_filename].append(pkg)
+
+ for srpm_name, filenames in srpms.items():
+ if len(filenames) > 1:
+ print >> sys.stderr, ("Packages come from different versions"
+ " of the same SRPM:")
+ for srpm_filename, pkgs in filenames.items():
+ print >> sys.stderr, " from {0}:".format(srpm_filename)
+ for pkg in pkgs:
+ print >> sys.stderr, " - {0}".format(pkg.name)
+
+class Callbacks(poldek.callbacks):
+ def log(self, pri, message):
+ print >> sys.stderr, message
+ sys.stderr.flush()
+
+def process(dir_path, out_name):
+ poldek.lib_init()
+ ctx = poldek.poldek_ctx()
+ ctx.set_verbose(True)
+ if not dir_path.endswith("/"):
+ dir_path = dir_path + "/"
+ src = poldek.source(None, "dir", dir_path, None)
+ ctx.configure(ctx.CONF_SOURCE, src)
+ ctx.load_config()
+ ctx.setup()
+
+ cctx = poldek.poclidek_ctx(ctx)
+ cctx.load_packages(cctx.LOAD_AVAILABLE)
+
+ cmd = cctx.rcmd()
+ cmd.execute("ls -q")
+ packages = cmd.packages
+
+ known = {}
+ obsolete = []
+
+ for package in packages:
+ if str(package.pkgdir) == '/var/lib/rpm':
+ continue
+ prev = known.get((package.name, package.arch()))
+ if prev:
+ if package.obsoletes_pkg(prev):
+ #print "newer"
+ obsolete.append(prev)
+ else:
+ #print "older"
+ obsolete.append(package)
+ continue
+ known[package.name, package.arch()] = package
+
+ old_list = sorted(pkg.filename_s() for pkg in obsolete)
+ with open(out_name, "w") as out_file:
+ print >> out_file, "#!/bin/sh\n# autogenerated"
+ for pkg_filename in old_list:
+ print >> out_file, "rm -f", pkg_filename
+ os.chmod(out_name, 0700)
+ sys.stderr.write("saved removal script to %s, %d entries\n" %
+ (out_name, len(old_list)))
+ find_dup_srpms(known)
+
+def usage():
+ sys.stderr.write(help_str)
+ sys.exit(1)
+
+def main():
+ out_name = default_out_name
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "s:o:")
+ except getopt.GetoptError:
+ usage()
+ for o, v in opts:
+ if o == "-o":
+ out_name = v
+ if len(args) != 1:
+ usage()
+ process(args[0], out_name)
+
+main()
+
+# vi: sw=2 sts=2 et ft=python
================================================================
More information about the pld-cvs-commit
mailing list