SVN: toys/cvsstats/email2sql.py

pawelz pawelz at pld-linux.org
Sun Feb 28 00:09:32 CET 2010


Author: pawelz
Date: Sun Feb 28 00:09:31 2010
New Revision: 11211

Added:
   toys/cvsstats/email2sql.py   (contents, props changed)
Log:
- initial, not finished


Added: toys/cvsstats/email2sql.py
==============================================================================
--- (empty file)
+++ toys/cvsstats/email2sql.py	Sun Feb 28 00:09:31 2010
@@ -0,0 +1,123 @@
+#!/usr/bin/python
+
+import os
+import sys
+import email
+import mimetypes
+import re
+import time
+
+rmsg = re.compile(
+  "\AAuthor: (?P<author>\S+)\s+Date: (?P<date>.*)\n"
+  "Module: (?P<module>\S+)\s+Tag: (?P<tag>.*)\n"
+  "---- Log message:\n"
+  "(?P<log>(.*\n)*)\n"
+  "---- Files affected:\n"
+  "(?P<files>(.*\n)*)\n"
+  "^---- Diffs:\n"
+  "(?P<diff>(.*\n)*)\n"
+  "^---- CVS-web:\n",
+  re.MULTILINE
+)
+
+rindex = re.compile("^Index: [^/]+/(?P<file>.*)$")
+
+class commit:
+  fdata = {}
+  hunks = 0
+  added = 0
+  removed = 0
+  author = None
+  date = None
+  module = None
+  tag = None
+  log = None
+
+  def __init__(self, msgfile):
+    fp = open(msgfile)
+    msg = email.message_from_file(fp)
+    fp.close()
+
+    for part in msg.walk():
+      p = part.get_payload(decode=True)
+      m = rmsg.match(p)
+      if m:
+        self.author = m.group("author")
+	d = time.strptime(m.group("date")[0:-4])
+	self.date = "%i-%i-%i %i:%i:%i" % (d.tm_year, d.tm_mon, d.tm_mday,
+	    d.tm_hour, d.tm_min, d.tm_sec)
+        self.module = m.group("module")
+        self.tag = m.group("tag")
+        self.log = m.group("log")
+        self.files = m.group("files")
+        self.parse_diff(m.group("diff"))
+        break;
+
+  def parse_diff(self, diff):
+    current_file=None
+
+    for line in diff.splitlines():
+
+      # is it a beginig of the new file?
+      m = rindex.match(line)
+      if (m):
+	if (current_file):
+	  self.fdata[current_file] = (hunks, added, removed)
+	  self.hunks += hunks
+	  self.added += added
+	  self.removed += removed
+
+	current_file = m.group("file")
+	hunks = 0
+	added = 0
+	removed = 0
+	continue
+      
+      if (not current_file):
+	continue
+
+      try:
+        if line[0] == "@":
+  	  hunks += 1
+      except IndexError:
+	continue
+
+      # don't read any line before the begining of the first hunk!
+      if hunks > 0:
+	if line[0] == "+":
+          added += 1
+
+        if line[0] == "-":
+	  removed += 1
+    else:
+      if (current_file):
+        self.fdata[current_file] = (hunks, added, removed)
+        self.hunks += hunks
+        self.added += added
+        self.removed += removed
+
+  def files_report(self):
+    for f in self.fdata:
+      print "file %s: %s hunks, +%s, -%s" % (f, self.fdata[f][0],
+	  self.fdata[f][1], self.fdata[f][2])
+
+  def commit_report(self):
+    print "Autor: %s, Tag: %s, Files: %s, Hunks: %s, +%s, -%s" % (self.author,
+	self.tag, str(len(self.fdata)), str(self.hunks), str(self.added),
+	str(self.removed))
+
+  def files_sql(self):
+    for f in self.fdata:
+      print "INSERT INTO files (filename, date, author, tag, hunks, added, removed) VALUES ('%s', '%s', '%s', '%s', %s, %s, %s);" % (f, self.date, self.author, self.tag, self.fdata[f][0], self.fdata[f][1], self.fdata[f][2])
+
+  def commit_sql(self):
+    for f in self.fdata:
+      print "INSERT INTO commits (author, date, tag, files, hunks, added, removed) VALUES ('%s', '%s', '%s', %s, %s, %s, %s);" % (self.author, self.date, self.tag, len(self.fdata), self.fdata[f][0], self.fdata[f][1], self.fdata[f][2])
+
+if __name__ == '__main__':
+  for f in sys.argv:
+    if f == sys.argv[0]:
+      continue
+    c = commit(f)
+    c.files_sql()
+    c.commit_sql()


More information about the pld-cvs-commit mailing list