SVN: toys/cvsstats/email2sql.py
pawelz
pawelz at pld-linux.org
Sun Feb 28 00:09:32 CET 2010
Author: pawelz
Date: Sun Feb 28 00:09:31 2010
New Revision: 11211
Added:
toys/cvsstats/email2sql.py (contents, props changed)
Log:
- initial, not finished
Added: toys/cvsstats/email2sql.py
==============================================================================
--- (empty file)
+++ toys/cvsstats/email2sql.py Sun Feb 28 00:09:31 2010
@@ -0,0 +1,123 @@
+#!/usr/bin/python
+
+import os
+import sys
+import email
+import mimetypes
+import re
+import time
+
+rmsg = re.compile(
+ "\AAuthor: (?P<author>\S+)\s+Date: (?P<date>.*)\n"
+ "Module: (?P<module>\S+)\s+Tag: (?P<tag>.*)\n"
+ "---- Log message:\n"
+ "(?P<log>(.*\n)*)\n"
+ "---- Files affected:\n"
+ "(?P<files>(.*\n)*)\n"
+ "^---- Diffs:\n"
+ "(?P<diff>(.*\n)*)\n"
+ "^---- CVS-web:\n",
+ re.MULTILINE
+)
+
+rindex = re.compile("^Index: [^/]+/(?P<file>.*)$")
+
+class commit:
+ fdata = {}
+ hunks = 0
+ added = 0
+ removed = 0
+ author = None
+ date = None
+ module = None
+ tag = None
+ log = None
+
+ def __init__(self, msgfile):
+ fp = open(msgfile)
+ msg = email.message_from_file(fp)
+ fp.close()
+
+ for part in msg.walk():
+ p = part.get_payload(decode=True)
+ m = rmsg.match(p)
+ if m:
+ self.author = m.group("author")
+ d = time.strptime(m.group("date")[0:-4])
+ self.date = "%i-%i-%i %i:%i:%i" % (d.tm_year, d.tm_mon, d.tm_mday,
+ d.tm_hour, d.tm_min, d.tm_sec)
+ self.module = m.group("module")
+ self.tag = m.group("tag")
+ self.log = m.group("log")
+ self.files = m.group("files")
+ self.parse_diff(m.group("diff"))
+ break;
+
+ def parse_diff(self, diff):
+ current_file=None
+
+ for line in diff.splitlines():
+
+ # is it a beginig of the new file?
+ m = rindex.match(line)
+ if (m):
+ if (current_file):
+ self.fdata[current_file] = (hunks, added, removed)
+ self.hunks += hunks
+ self.added += added
+ self.removed += removed
+
+ current_file = m.group("file")
+ hunks = 0
+ added = 0
+ removed = 0
+ continue
+
+ if (not current_file):
+ continue
+
+ try:
+ if line[0] == "@":
+ hunks += 1
+ except IndexError:
+ continue
+
+ # don't read any line before the begining of the first hunk!
+ if hunks > 0:
+ if line[0] == "+":
+ added += 1
+
+ if line[0] == "-":
+ removed += 1
+ else:
+ if (current_file):
+ self.fdata[current_file] = (hunks, added, removed)
+ self.hunks += hunks
+ self.added += added
+ self.removed += removed
+
+ def files_report(self):
+ for f in self.fdata:
+ print "file %s: %s hunks, +%s, -%s" % (f, self.fdata[f][0],
+ self.fdata[f][1], self.fdata[f][2])
+
+ def commit_report(self):
+ print "Autor: %s, Tag: %s, Files: %s, Hunks: %s, +%s, -%s" % (self.author,
+ self.tag, str(len(self.fdata)), str(self.hunks), str(self.added),
+ str(self.removed))
+
+ def files_sql(self):
+ for f in self.fdata:
+ print "INSERT INTO files (filename, date, author, tag, hunks, added, removed) VALUES ('%s', '%s', '%s', '%s', %s, %s, %s);" % (f, self.date, self.author, self.tag, self.fdata[f][0], self.fdata[f][1], self.fdata[f][2])
+
+ def commit_sql(self):
+ for f in self.fdata:
+ print "INSERT INTO commits (author, date, tag, files, hunks, added, removed) VALUES ('%s', '%s', '%s', %s, %s, %s, %s);" % (self.author, self.date, self.tag, len(self.fdata), self.fdata[f][0], self.fdata[f][1], self.fdata[f][2])
+
+if __name__ == '__main__':
+ for f in sys.argv:
+ if f == sys.argv[0]:
+ continue
+ c = commit(f)
+ c.files_sql()
+ c.commit_sql()
More information about the pld-cvs-commit
mailing list