SVN: security/cve_reader.py

Sun Oct 14 21:02:36 CEST 2007

Author: megabajt
Date: Sun Oct 14 21:02:36 2007
New Revision: 8817

Modified:
   security/cve_reader.py
Log:
- completely rewritten
 - generates XML file
 - catches CVE notes added to other revisions
 - support for mass commits
 - RSS has now only latest RSSITEM items


Modified: security/cve_reader.py
==============================================================================

--- security/cve_reader.py	(original)
+++ security/cve_reader.py	Sun Oct 14 21:02:36 2007
@@ -1,9 +1,8 @@
 #!/usr/bin/python
 #
 # CVE security reader for pld-linux.org purpose
-# Basically it parses commits.log and searches for "CVE" keyword, then it generates a .html file with simple table structure
+# Basically it parses commits.log and searches for "CVE" keyword, then it generates a .xml file
 #
-# TODO
 #
 import os
 import sys
@@ -11,16 +10,27 @@
 import readline
 import time
 import datetime
+
 import PyRSS2Gen
+import xml.etree.cElementTree as ET
 
 # Changes go here
-log = '/cvsroot/SPECS/commits.log'
-cvsroot = "/cvsroot/"
-cvsmodule = "SPECS/"
-h_page = "header.html"
-table_page = "security.html"
-f_page = "footer.html"
-size_f = "size.txt"
+CVSLOG = '/cvsroot/SPECS/commits.log'
+CVSROOT = "/cvsroot/"
+CVSMODULE = "SPECS/"
+SIZEFILE = "size.txt"
+XMLFILE = "security.xml"
+
+#######################
+# RSS
+#######################
+
+RSSFILE = "rss.xml"
+
+RSSITEMS = 30
+
+#####################################################################
+
 
 # Don't change anything below unless you know what you're doing
 specs =  []
@@ -30,75 +40,123 @@
 date = []
 
 # Main parse function
-def parse():
-	# seek where we last ended parsing
-	if os.path.isfile(size_f):
-		f2 = open(size_f, 'r')
-		old_size = f2.read().split("L")
-		old_size = long(old_size[0])
-		f2.close()
+def CVSlogparse(rootnode):
+	# Seek where we last ended parsing
+	
+	# Read info about old size
+	if os.path.isfile(SIZEFILE):
+		f = open(SIZEFILE, 'r')
+		oldsize = f.read().split('L')
+		oldsize = long(oldsize[0])
+		f.close()
 	else:
-		old_size = 0
+		oldsize = 0
 	
-	f = open(log)
-	f.seek(old_size) # end seeking
+	f = open(CVSLOG)
+	f.seek(oldsize) # end seeking
 	read = f.xreadlines()
 	for l in read:
 		l = l.strip()
 		lines.append(l)
-	lines_len = len(lines)
-	for i in range(lines_len):
-		if lines[i] == "Modified files:":
-			spec = lines[i+1]
-		if lines[i] == "Log message:":
-			cvslog = 1
-			cve = ""
-			while(lines[i+cvslog] != ""):
-				cve_match = re.findall('(CVE-[0-9]+-[0-9]+)', lines[i+cvslog])
-				if cve_match:
-					for z in range(len(cve_match)):
-						cve += "<a href=\"http://cve.mitre.org/cgi-bin/cvename.cgi?name=%s\">%s</a> " % (cve_match[z], cve_match[z])
-				cvslog = cvslog+1
+	
+	i = 0
+	while(i < len(lines)):
+		# Extract spec name
+		if re.match('^Index\:.*\.spec', lines[i]):
+			spec = lines[i].split(' ')[1]
+			# Next 5 lines have nothing interesting (like "$Log$" string)
+			i = i + 5
+			continue
+
 		if lines[i] == "$Log$":
-			p = lines[i+1].split(" ")
+			cve = ""
+			cvslog = 1
+			
+			while i + cvslog < len(lines) and not re.match('^Index\:.*\.spec', lines[i + cvslog]):
+				if re.match('.*Revision.*', lines[i+cvslog]):
+					if cve != "":
+						# Save CVEs from the last revision
+						addCVEnote(rootnode, spec, cve, p[1], p[3])
+						cve = ""
+					# Set new revison data
+					p = lines[i+cvslog].split(" ")
+				else:
+					# Check if in added line exists some CVE note
+					if re.match('^\+.*(CVE-[0-9\-]+)', lines[i+cvslog]):
+						# Good, found CVE entries. Extract them!
+						cve_list = re.findall("CVE-[0-9\-]+", lines[i+cvslog])
+						for iter in range(len(cve_list)):
+							cve += "%s " % cve_list[iter]
+				
+				cvslog = cvslog + 1
+			
 			if cve != "":
-				specs.append(spec)
-				cves.append(cve)
-				revs.append(p[1])
-				date.append(p[3])
-	f1 = open(table_page, 'w')
-	x = len(cves)-1
-	# LIFO - means, reverse the array
-	while x!=-1:
-		resolved = getCVSentry(specs[x], revs[x])
-		if resolved == 0:
-			f1.write("<tr><td>%s</td>\n" % (date[x]))
-			f1.write("<td><a href=\"http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/SPECS/%s?rev=%s\">%s</a></td>\n" % (specs[x], revs[x], specs[x]))
-			f1.write("<td>%s</td>\n" % (cves[x]))
-			f1.write("<td>%s</td>\n" % (revs[x]))
-			f1.write("<td>%s</td></tr>\n" % (revs[x]))
-		else:
-			f1.write("<tr><td>%s</td>\n" % (date[x]))
-			f1.write("<td>")
-			for i in range(len(resolved)):
-				rev_tag = resolved[i].split(":")
-				f1.write("<a href=\"http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/SPECS/%s?logsort=rev;only_with_tag=%s\">%s</a><br/>" % (specs[x], rev_tag[0], specs[x]))
-			f1.write("</td>\n")
-			f1.write("<td>%s</td>\n" % (cves[x]))
-			f1.write("<td>%s</td>\n" % (revs[x]))
-			f1.write("<td>")
-			for i in range(len(resolved)):
-				f1.write("%s<br/>" % (resolved[i]))
-			f1.write("</td></tr>\n")
-		x = x-1
-	f1.close()
-	# write new file size
+				addCVEnote(rootnode, spec, cve, p[1], p[3])
+			
+			# Don't check already checked lines
+			i = i + cvslog - 1
+			continue
+		
+		# Increase i
+		i = i + 1
+			
+	# Write new CVSLOG file size
 	size = os.fstat(f.fileno())
 	size = str(size).split(", ")
-	fs = open(size_f, "w")
+	fs = open(SIZEFILE, "w")
 	fs.write(size[6])
 	fs.close()
 
+def addCVEnote(rootnode, spec, cve, revision, date):
+	
+	resolved = ""
+
+	res = getCVSentry(spec, revision)
+
+	if res == 0:
+		resolved = revision
+	else:
+		for i in range(len(res)):
+			resolved += "%s " % res[i]
+	
+	# Generate package node
+	package = ET.Element("package")
+	ET.SubElement(package, "date").text = date
+	ET.SubElement(package, "spec").text = spec
+	ET.SubElement(package, "revision").text = revision
+	ET.SubElement(package, "resolved").text = resolved
+	ET.SubElement(package, "cves").text = cve
+
+	if len(rootnode) == 0:
+		# rootnode is empty and has no children. I can easily add new (without sorting)
+		rootnode.append(package)
+	else:
+		prevdate = ""
+		
+		# Maybe new entry can be added at the end? I need check it.
+		item = len(rootnode) - 1
+		while(item >= 0):
+			
+			subitem = getTagIndex(rootnode[item], 'date')
+			
+			prevdate = rootnode[item][subitem].text
+
+			if cmp(prevdate, date) <= 0:
+				rootnode.insert(item + 1, package)
+				return
+
+			item = item - 1
+
+		# Huh, new entry is the youngest one
+		rootnode.insert(0, package)
+
+def getTagIndex(node, tag):
+	item = ""
+
+	for item in range(0, len(node)):
+		if node[item].tag == tag:
+			return item
+
 # get cvs log entries (auto-tags) for specs
 def getCVSentry(spec, revision):
 	tags = []
@@ -106,7 +164,7 @@
 	ac_tag = ""
 	th_tag = ""
 	ti_tag = ""
-	autotag = os.popen("cvs -d %s log -tr%s: %s%s |grep -A300 symbolic |grep auto" % (cvsroot, revision, cvsmodule, spec))
+	autotag = os.popen("cvs -d %s log -tr%s: %s%s |grep -A300 symbolic |grep auto" % (CVSROOT, revision, CVSMODULE, spec))
 	for l in autotag.xreadlines():
 		l = l.strip()
 		tags.append(l)
@@ -140,31 +198,6 @@
 def rsync():
 	os.system("rsync rsync://cvs.pld-linux.org/cvs/SPECS/commits.log .")
 
-# do I need to explain this function?
-def genPageHeader():
-	t = datetime.datetime.now()
-	EpochSeconds = time.mktime(t.timetuple())
-	now = datetime.datetime.fromtimestamp(EpochSeconds)
-	f = open(h_page, 'w')
-	f.write("<p align=\"center\">Generated on: %s<br/>\n" % now.ctime())
-	f.write("<a href=\"http://security.pld-linux.org/pld_security.xml\">Get the RSS feed!</a></p>\n")
-	f.write("<table><tr>\n")
-	f.write("<td width=\"50\"><b>Date</b></td>\n")
-	f.write("<td width=\"50\"><b>SPEC</b></td>\n")
-	f.write("<td width=\"250\"><b>CVE Entry</b></td>\n")
-	f.write("<td width=\"50\"><b>Revision</b></td>\n")
-	f.write("<td width=\"300\"><b>Resolved with</b></td>\n")
-	f.write("</tr><tr>\n")
-	f.close()
-
-# ...or this one?
-def genPageFooter():
-	f = open(f_page, 'w')
-	f.write("</tr></table>\n")
-	f.write("<p align=\"right\">\n")
-	f.write("<img src=\"http://pl.docs.pld-linux.org/zrzuty_ekr/logo_03.png\" alt=\"PLD\" /></p>\n")
-	f.close()
-
 #compares whether rev1 is greater than rev2 and returns 0 if true, 1 if false
 def compRevs(rev1, rev2):
 	rev1 = rev1.split(".")
@@ -189,32 +222,61 @@
 					break
 	return 0	
 
-def genRSS():
-	item = []
-	for i in range(len(specs)):
-		item.append(
+def getPackageData(package, taglist):
+	pkg = {}
+	
+	for i in range(len(taglist)):
+		idx = getTagIndex(package, taglist[i])
+		pkg[taglist[i]] = package[idx].text
+	
+	return pkg
+
+def genRSSFeed(rootnode):
+	rssitem = []
+
+	if len(rootnode) - RSSITEMS < 0:
+		start = 0
+	else:
+		start = len(rootnode) - RSSITEMS
+
+	for item in range(start, len(rootnode)):
+		pkg = getPackageData(rootnode[item], ['date', 'spec', 'revision', 'cves'])
+		
+		# date[0] - year; date[1] - month; date[2] - day
+		date = pkg['date'].split('/')
+
+		rssitem.append(
 			PyRSS2Gen.RSSItem(
-				title = "New CVE fixes for %s" % specs[i],
-				link = "http://security.pld-linux.org/pld_security.xml",
-				description = "%s on rev. %s resolves: %s" % (specs[i], revs[i], cves[i]),
-				guid = PyRSS2Gen.Guid("http://security.pld-linux.org/"),
-				pubDate = datetime.datetime.now()),
+				title = "New CVE fixes for %s" % pkg['spec'],
+				description = "%s on rev. %s resolves: %s" % (pkg['spec'], pkg['revision'], pkg['cves']),
+				pubDate = datetime.datetime(int(date[0]), int(date[1]), int(date[2]), 0, 0, 0)
+			)
 		)
-	rss = PyRSS2Gen.RSS2(
+	
+	RSS = PyRSS2Gen.RSS2(
 		title = "PLD Security Feed",
 		link = "http://security.pld-linux.org/",
 		description = "The latest security fixes in PLD's RPMs",
-
 		lastBuildDate = datetime.datetime.now(),
-		items = item
+		items = rssitem
 	)
-	rss.write_xml(open("www/pld_security.xml", "w"))
+
+	RSS.write_xml(open(RSSFILE, 'w'))
+
+def main():
+	if os.path.isfile(XMLFILE):
+		# Open and parse XMLFILE
+		tree = ET.parse(XMLFILE)
+		root = tree.getroot()
+	else:
+		root = ET.Element('security')
+	
+	CVSlogparse(root)
+	genRSSFeed(root)
+	
+	ET.ElementTree(root).write(XMLFILE)
 
 
 # now call them
-genPageHeader()
+main()
 #rsync()
-parse()
-genPageFooter()
-if len(specs):
-	genRSS()