admin: ftp/find-duplicates2.py (NEW) find-duplicate.py rewritten to use Pol...

jajcus jajcus at pld-linux.org
Fri Jun 29 13:59:57 CEST 2012


Author: jajcus                       Date: Fri Jun 29 11:59:57 2012 GMT
Module: admin                         Tag: HEAD
---- Log message:
find-duplicate.py rewritten to use Poldek's python bindings. A bit faster.
Different name, as it doesn't provide exactly the same functionality (regexp
matching is missing, but probably trivial to add).

---- Files affected:
admin/ftp:
   find-duplicates2.py (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: admin/ftp/find-duplicates2.py
diff -u /dev/null admin/ftp/find-duplicates2.py:1.1
--- /dev/null	Fri Jun 29 13:59:57 2012
+++ admin/ftp/find-duplicates2.py	Fri Jun 29 13:59:52 2012
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+
+import os, sys, re, getopt, stat
+from collections import defaultdict
+
+import poldek
+
+default_out_name = "clean-dir"
+
+help_str = """
+Create a script to remove duplicate RPMS from a directory and report binary
+packages built from different versions of the same source package.
+
+USAGE: {0} [-o out_file] directory
+
+out_file defaults to '{1}'.
+""".format(sys.argv[0], default_out_name)
+
+RPM_FILENAME_RE = re.compile(r"^(.*)-([^-]*)-([^-]*)\.[^-\.]*\.rpm$")
+
+def make_list_dict():
+    return defaultdict(list)
+
+def find_dup_srpms(packages):
+    srpms = defaultdict(make_list_dict)
+
+    for pkg in packages.values():
+        source_filename = pkg.srcfilename_s()
+        if not source_filename:
+            continue
+        match = RPM_FILENAME_RE.match(source_filename)
+        if not match:
+            sys.stderr.write("problems with SRPM name: %s\n" % source_filename)
+            continue
+        source_name = match.group(1)
+        srpms[source_name][source_filename].append(pkg)
+
+    for srpm_name, filenames in srpms.items():
+        if len(filenames) > 1:
+            print >> sys.stderr, ("Packages come from different versions"
+                                                        " of the same SRPM:")
+            for srpm_filename, pkgs in filenames.items():
+                print >> sys.stderr, "  from {0}:".format(srpm_filename)
+                for pkg in pkgs:
+                    print >> sys.stderr, "    - {0}".format(pkg.name)
+
+class Callbacks(poldek.callbacks):
+    def log(self, pri, message):
+        print >> sys.stderr, message
+        sys.stderr.flush()
+
+def process(dir_path, out_name):
+    poldek.lib_init()
+    ctx = poldek.poldek_ctx()
+    ctx.set_verbose(True)
+    if not dir_path.endswith("/"):
+        dir_path = dir_path + "/"
+    src = poldek.source(None, "dir", dir_path, None)
+    ctx.configure(ctx.CONF_SOURCE, src)
+    ctx.load_config()
+    ctx.setup()
+
+    cctx = poldek.poclidek_ctx(ctx)
+    cctx.load_packages(cctx.LOAD_AVAILABLE)
+
+    cmd = cctx.rcmd()
+    cmd.execute("ls -q")
+    packages = cmd.packages
+
+    known = {}
+    obsolete = []
+
+    for package in packages:
+        if str(package.pkgdir) == '/var/lib/rpm':
+            continue
+        prev = known.get((package.name, package.arch()))
+        if prev:
+            if package.obsoletes_pkg(prev):
+                #print "newer"
+                obsolete.append(prev)
+            else:
+                #print "older"
+                obsolete.append(package)
+                continue
+        known[package.name, package.arch()] = package
+       
+    old_list = sorted(pkg.filename_s() for pkg in obsolete)
+    with open(out_name, "w") as out_file:
+        print >> out_file, "#!/bin/sh\n# autogenerated"
+        for pkg_filename in old_list:
+            print >> out_file, "rm -f", pkg_filename
+    os.chmod(out_name, 0700)
+    sys.stderr.write("saved removal script to %s, %d entries\n" % 
+    		            (out_name, len(old_list)))
+    find_dup_srpms(known)
+
+def usage():
+    sys.stderr.write(help_str)
+    sys.exit(1)
+
+def main():
+    out_name = default_out_name
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "s:o:")
+    except getopt.GetoptError:
+        usage()
+    for o, v in opts:
+        if o == "-o":
+            out_name = v
+    if len(args) != 1:
+        usage()
+    process(args[0], out_name)
+
+main()
+
+# vi: sw=2 sts=2 et ft=python
================================================================


More information about the pld-cvs-commit mailing list