SVN: packages.php/plough.php

vip vip at pld-linux.org
Fri Dec 28 23:22:03 CET 2007


Author: vip
Date: Fri Dec 28 23:22:03 2007
New Revision: 9166

Added:
   packages.php/plough.php   (contents, props changed)
Log:
- script for gathering packages data


Added: packages.php/plough.php
==============================================================================
--- (empty file)
+++ packages.php/plough.php	Fri Dec 28 23:22:03 2007
@@ -0,0 +1,193 @@
+#!/usr/bin/php.cli
+<?php
+/*
+ * That script plough over the xml files, and put the informations
+ * into the SQL database (for now - MySQL).
+ *
+ * GPL, Piotr Budny, vip at pld-linux.org
+ *
+ */
+
+$mysqli = new mysqli("localhost", "mysql", "", "packages");
+
+function fill_source_list()
+{
+	global $mysqli;
+
+	$sources = array();
+
+	$result = $mysqli->query("SELECT `module_id`, `url` FROM `module`");
+	while($field = $result->fetch_assoc())
+	{
+		$sources[$field['module_id']] = $field['url'];
+	}
+	return $sources;
+}
+
+function find_package_id($name, $module, $epoch, $version, $release)
+{
+	global $mysqli;
+
+	$result = $mysqli->query("SELECT `package_id` FROM `package` WHERE `name`='$name' AND `module_id`='$module' AND `epoch`='$epoch' AND `version`='$version' AND `release`='$release'") or die(mysql_error());
+
+	$id = false;
+
+	if($result->num_rows === 1)
+	{
+		$id = $result->fetch_row();
+		$id = $id[0];
+	}
+	return $id;
+}
+
+function find_module_id($dist, $arch, $name)
+{
+	global $mysqli;
+
+	$result = $mysqli->query("SELECT `module_id` FROM `module` WHERE `name`='$name' AND `arch_id`='$arch' AND `dist_id`='$dist'") or die(mysql_error());
+
+	$id = false;
+
+	if($result->num_rows === 1)
+	{
+		$id = $result->fetch_row();
+		$id = $id[0];
+	}
+	return $id;
+}
+
+function find_dist_id($name)
+{
+	return 1;
+}
+
+function find_arch_id($name)
+{
+	return 1;
+}
+
+function clean_database()
+{
+	global $mysqli;
+	$mysqli->query("TRUNCATE file");
+	$mysqli->query("TRUNCATE package");
+}
+
+function parse_primary()
+{
+	global $mysqli;
+
+	$dir = "ready"; // TODO: gather from ftp dir
+	$dist = "th";
+	$arch = "i686";
+
+	$module = find_module_id(find_dist_id($dist), find_arch_id($arch), $dir);
+
+	$metadata = simplexml_load_file("primary.xml");
+
+	$mysqli->autocommit(false);
+
+	foreach($metadata->package as $package)
+	{
+// 		if(strrpos($package->name, "-debuginfo"))
+// 			continue;
+
+		$q = "INSERT INTO package VALUES ('";
+		$q .= $package->checksum;
+		$q .= "','";
+		$q .= $package->name;
+		$q .= "','";
+		$q .= $module;
+		$q .= "','";
+		$q .= $package->version->Attributes()->epoch;
+		$q .= "','";
+		$q .= $package->version->Attributes()->ver;
+		$q .= "','";
+		$q .= $package->version->Attributes()->rel;
+		$q .= "', FROM_UNIXTIME(";
+		$q .= $package->time->Attributes()->file;
+		$q .= "), FROM_UNIXTIME(";
+		$q .= $package->time->Attributes()->build;
+		$q .= "),'";
+		$q .= $package->size->Attributes()->package;
+		$q .= "','";
+		$q .= $package->size->Attributes()->installed;
+		$q .= "','";
+		$q .= $package->size->Attributes()->archive;
+		$q .= "')";
+		$mysqli->query($q);
+
+		$q = "INSERT IGNORE INTO description VALUES ('";
+		$q .= $package->name;
+		$q .= "','";
+		$q .= addslashes($package->summary);
+		$q .= "','";
+		$q .= $package->url;
+		$q .= "','";
+		$q .= addslashes($package->description);
+		$q .= "')";
+		$mysqli->query($q);
+	}
+	$mysqli->commit();
+}
+
+function parse_filelists()
+{
+	global $mysqli;
+
+	$i = 0;
+
+	$filelists = simplexml_load_file("filelists.xml");
+/*	$dir = "ready"; // TODO: gather from ftp dir
+	$dist = "th";
+	$arch = "i686";*/
+
+// 	$module = find_module_id(find_dist_id($dist), find_arch_id($arch), $dir);
+
+	$mysqli->autocommit(false);
+
+	foreach($filelists->package as $package)
+	{
+		$pkgid = $package->Attributes()->pkgid;
+
+		foreach($package->file as $file)
+		{
+			//print_r($file);
+			//die();
+			$q = "INSERT INTO file VALUES ('";
+			$q .= $pkgid;
+			$q .= "','";
+			$q .= $file[0];
+			$q .= "')";
+			$mysqli->query($q);
+
+			/*if(++$i > 100000)
+			{
+				echo "     [.] commit\n";
+				$mysqli->commit();
+				$i = 0;
+			}*/
+		}
+	}
+	$mysqli->commit();
+}
+
+echo "[*] gathering sources...\n";
+$sources = fill_source_list();
+
+echo "[*] cleaning...\n";
+
+clean_database();
+// die();
+foreach($sources as $module_id => $url)
+{
+// 	echo "[*] th, ready, i686\n";
+	echo "[*] $module_id ($url)\n";
+
+	echo "   [+] primary\n";
+	parse_primary();
+
+	echo "   [+] filelists\n";
+	parse_filelists();
+}
+?>


More information about the pld-cvs-commit mailing list