SVN: packages.php/plough.php
vip
vip at pld-linux.org
Fri Dec 28 23:22:03 CET 2007
Author: vip
Date: Fri Dec 28 23:22:03 2007
New Revision: 9166
Added:
packages.php/plough.php (contents, props changed)
Log:
- script for gathering packages data
Added: packages.php/plough.php
==============================================================================
--- (empty file)
+++ packages.php/plough.php Fri Dec 28 23:22:03 2007
@@ -0,0 +1,193 @@
+#!/usr/bin/php.cli
+<?php
+/*
+ * That script plough over the xml files, and put the informations
+ * into the SQL database (for now - MySQL).
+ *
+ * GPL, Piotr Budny, vip at pld-linux.org
+ *
+ */
+
+$mysqli = new mysqli("localhost", "mysql", "", "packages");
+
+function fill_source_list()
+{
+ global $mysqli;
+
+ $sources = array();
+
+ $result = $mysqli->query("SELECT `module_id`, `url` FROM `module`");
+ while($field = $result->fetch_assoc())
+ {
+ $sources[$field['module_id']] = $field['url'];
+ }
+ return $sources;
+}
+
+function find_package_id($name, $module, $epoch, $version, $release)
+{
+ global $mysqli;
+
+ $result = $mysqli->query("SELECT `package_id` FROM `package` WHERE `name`='$name' AND `module_id`='$module' AND `epoch`='$epoch' AND `version`='$version' AND `release`='$release'") or die(mysql_error());
+
+ $id = false;
+
+ if($result->num_rows === 1)
+ {
+ $id = $result->fetch_row();
+ $id = $id[0];
+ }
+ return $id;
+}
+
+function find_module_id($dist, $arch, $name)
+{
+ global $mysqli;
+
+ $result = $mysqli->query("SELECT `module_id` FROM `module` WHERE `name`='$name' AND `arch_id`='$arch' AND `dist_id`='$dist'") or die(mysql_error());
+
+ $id = false;
+
+ if($result->num_rows === 1)
+ {
+ $id = $result->fetch_row();
+ $id = $id[0];
+ }
+ return $id;
+}
+
+function find_dist_id($name)
+{
+ return 1;
+}
+
+function find_arch_id($name)
+{
+ return 1;
+}
+
+function clean_database()
+{
+ global $mysqli;
+ $mysqli->query("TRUNCATE file");
+ $mysqli->query("TRUNCATE package");
+}
+
+function parse_primary()
+{
+ global $mysqli;
+
+ $dir = "ready"; // TODO: gather from ftp dir
+ $dist = "th";
+ $arch = "i686";
+
+ $module = find_module_id(find_dist_id($dist), find_arch_id($arch), $dir);
+
+ $metadata = simplexml_load_file("primary.xml");
+
+ $mysqli->autocommit(false);
+
+ foreach($metadata->package as $package)
+ {
+// if(strrpos($package->name, "-debuginfo"))
+// continue;
+
+ $q = "INSERT INTO package VALUES ('";
+ $q .= $package->checksum;
+ $q .= "','";
+ $q .= $package->name;
+ $q .= "','";
+ $q .= $module;
+ $q .= "','";
+ $q .= $package->version->Attributes()->epoch;
+ $q .= "','";
+ $q .= $package->version->Attributes()->ver;
+ $q .= "','";
+ $q .= $package->version->Attributes()->rel;
+ $q .= "', FROM_UNIXTIME(";
+ $q .= $package->time->Attributes()->file;
+ $q .= "), FROM_UNIXTIME(";
+ $q .= $package->time->Attributes()->build;
+ $q .= "),'";
+ $q .= $package->size->Attributes()->package;
+ $q .= "','";
+ $q .= $package->size->Attributes()->installed;
+ $q .= "','";
+ $q .= $package->size->Attributes()->archive;
+ $q .= "')";
+ $mysqli->query($q);
+
+ $q = "INSERT IGNORE INTO description VALUES ('";
+ $q .= $package->name;
+ $q .= "','";
+ $q .= addslashes($package->summary);
+ $q .= "','";
+ $q .= $package->url;
+ $q .= "','";
+ $q .= addslashes($package->description);
+ $q .= "')";
+ $mysqli->query($q);
+ }
+ $mysqli->commit();
+}
+
+function parse_filelists()
+{
+ global $mysqli;
+
+ $i = 0;
+
+ $filelists = simplexml_load_file("filelists.xml");
+/* $dir = "ready"; // TODO: gather from ftp dir
+ $dist = "th";
+ $arch = "i686";*/
+
+// $module = find_module_id(find_dist_id($dist), find_arch_id($arch), $dir);
+
+ $mysqli->autocommit(false);
+
+ foreach($filelists->package as $package)
+ {
+ $pkgid = $package->Attributes()->pkgid;
+
+ foreach($package->file as $file)
+ {
+ //print_r($file);
+ //die();
+ $q = "INSERT INTO file VALUES ('";
+ $q .= $pkgid;
+ $q .= "','";
+ $q .= $file[0];
+ $q .= "')";
+ $mysqli->query($q);
+
+ /*if(++$i > 100000)
+ {
+ echo " [.] commit\n";
+ $mysqli->commit();
+ $i = 0;
+ }*/
+ }
+ }
+ $mysqli->commit();
+}
+
+echo "[*] gathering sources...\n";
+$sources = fill_source_list();
+
+echo "[*] cleaning...\n";
+
+clean_database();
+// die();
+foreach($sources as $module_id => $url)
+{
+// echo "[*] th, ready, i686\n";
+ echo "[*] $module_id ($url)\n";
+
+ echo " [+] primary\n";
+ parse_primary();
+
+ echo " [+] filelists\n";
+ parse_filelists();
+}
+?>
More information about the pld-cvs-commit
mailing list