From 3994d539918501cf7308f4b54218acbe09049223 Mon Sep 17 00:00:00 2001
From: Callan Barrett <wizzomafizzo@gmail.com>
Date: Tue, 17 Jun 2008 04:53:37 +0800
Subject: [PATCH] Parsing PKGBUILDs with bash script

Now using a bash script to parse pkgbuilds and cleaned up some variable names
to work with it better

Signed-off-by: Callan Barrett <wizzomafizzo@gmail.com>
---
 web/html/pkgsubmit.php     |  281 +++++++++++++++++---------------------------
 web/utils/parsepkgbuild.sh |   43 +++++++
 2 files changed, 150 insertions(+), 174 deletions(-)
 create mode 100755 web/utils/parsepkgbuild.sh

diff --git a/web/html/pkgsubmit.php b/web/html/pkgsubmit.php
index 65706e8..7e04923 100644
--- a/web/html/pkgsubmit.php
+++ b/web/html/pkgsubmit.php
@@ -12,7 +12,7 @@ include("pkgfuncs.inc");    # package functions
 
 set_lang();                 # this sets up the visitor's language
 check_sid();                # see if they're still logged in
-html_header("Submit");
+html_header(__("Submit"));
 
 ?>
 
@@ -36,37 +36,35 @@ if ($_COOKIE["AURSID"]):
 			$error = __("Error - No file uploaded");
 		}
 
-		# Temporary dir to put the tarball contents
+    # Set up directories and put tarball/pkgbuild contents in it
 		$tempdir = UPLOAD_DIR . uid_from_sid($_COOKIE['AURSID']) . time();
 
-		if (!$error) {
-			if (!@mkdir($tempdir)) {
-				$error = __("Could not create incoming directory: %s.",
-					array($tempdir));
-			} else {
-				if (!@chdir($tempdir)) {
-					$error = __("Could not change directory to %s.",
-						array($tempdir));
-				} else {
-				  if ($_FILES['pfile']['name'] == "PKGBUILD") {
-				    move_uploaded_file($_FILES['pfile']['tmp_name'], $tempdir . "/PKGBUILD");
-				  } else {
-  					$tar = new Archive_Tar($_FILES['pfile']['tmp_name']);
-  					$extract = $tar->extract();
-  					
-  					if (!$extract) {
-  						$error = __("Unknown file format for uploaded file.");
-  					}
-				  }
-				}
-			}
+		if (!$error && !@mkdir($tempdir)) {
+			$error = __("Could not create incoming directory: %s.", $tempdir);
+		}
+		
+		if (!$error && !@chdir($tempdir)) {
+			$error = __("Could not change directory to %s.", $tempdir);
 		}
+		
+	  if (!$error && $_FILES['pfile']['name'] == "PKGBUILD") {
+	    move_uploaded_file($_FILES['pfile']['tmp_name'], $tempdir . "/PKGBUILD");
+	  } else if (!$error) {
+			$tar = new Archive_Tar($_FILES['pfile']['tmp_name']);
+			$extract = $tar->extract();
+			
+			if (!$extract) {
+				$error = __("Unknown file format for uploaded file.");
+			}
+	  }
 
 		# Find the PKGBUILD
 		if (!$error) {
 		  $pkgbuild = File_Find::search('PKGBUILD', $tempdir);
 		  
 		  if (count($pkgbuild)) {
+		    # We'll make the assumption that the first PKGBUILD we find is the
+		    # right one and the directory it's in is our base directory
 		    $pkgbuild = $pkgbuild[0];
 		    $pkg_dir = dirname($pkgbuild);
 		  } else {
@@ -74,143 +72,84 @@ if ($_COOKIE["AURSID"]):
 		  }
 		}
 
-		# if no error, get list of directory contents and process PKGBUILD
-		# TODO: This needs to be completely rewritten to support stuff like arrays
-		# and variable substitution among other things.
 		if (!$error) {
-			# process PKGBIULD - remove line concatenation
-			#
-			$pkgbuild = array();
-			$fp = fopen($pkg_dir."/PKGBUILD", "r");
-			$line_no = 0;
-			$lines = array();
-			$continuation_line = 0;
-			$current_line = "";
-			while (!feof($fp)) {
-				$line = trim(fgets($fp));
-				$char_counts = count_chars($line, 0);
-				if (substr($line, strlen($line)-1) == "\\") {
-					# continue appending onto existing line_no
-					#
-					$current_line .= substr($line, 0, strlen($line)-1);
-					$continuation_line = 1;
-				} elseif ($char_counts[ord('(')] > $char_counts[ord(')')]) {
-					# assumed continuation
-					# continue appending onto existing line_no
-					#
-					$current_line .= $line . " ";
-					$continuation_line = 1;
-				} else {
-					# maybe the last line in a continuation, or a standalone line?
-					#
-					if ($continuation_line) {
-						# append onto existing line_no
-						#
-						$current_line .= $line;
-						$lines[$line_no] = $current_line;
-						$current_line = "";
-					} else {
-						# it's own line_no
-						#
-						$lines[$line_no] = $line;
-					}
-					$continuation_line = 0;
-					$line_no++;
-				}
-			}
-			fclose($fp);
-
-			# Now process the lines and put any var=val lines into the
-			# 'pkgbuild' array.	Also check to make sure it has the build()
-			# function.
-			#
-			$seen_build_function = 0;
-			while (list($k, $line) = each($lines)) {
-				$lparts = explode("=", $line, 2);
-				if (count($lparts) == 2) {
-					# this is a variable/value pair, strip out
-					# array parens and any quoting, except in pkgdesc
-					# for pkgdesc, only remove start/end pairs of " or '
-					if ($lparts[0]=="pkgdesc") {
-						if ($lparts[1]{0} == '"' && 
-								$lparts[1]{strlen($lparts[1])-1} == '"') {
-							$pkgbuild[$lparts[0]] = substr($lparts[1], 1, -1);
-						}
-					 	elseif 
-							($lparts[1]{0} == "'" && 
-							 $lparts[1]{strlen($lparts[1])-1} == "'") {
-							$pkgbuild[$lparts[0]] = substr($lparts[1], 1, -1);
-						} else { 
-							$pkgbuild[$lparts[0]] = $lparts[1];
-					 	}
-					} else {
-						$pkgbuild[$lparts[0]] = str_replace(array("(",")","\"","'"), "",
-								$lparts[1]);
-					}
-				} else {
-					# either a comment, blank line, continued line, or build function
-					#
-					if (substr($lparts[0], 0, 5) == "build") {
-						$seen_build_function = 1;
-					}
-				}
-			}
-
-			# some error checking on PKGBUILD contents - just make sure each
-			# variable has a value.	This does not do any validity checking
-			# on the values, or attempts to fix line continuation/wrapping.
+		  # Fire off the bash script to parse pkgbuilds
+      $lines = array();
+      exec('bash ' . $_SERVER['DOCUMENT_ROOT'] . '/../utils/parsepkgbuild.sh PKGBUILD', $lines);
+      
+      # Ok, let's parse the output of the script. This is the same format as
+      # the pacman database files (or pkginfo files or whatever)
+      $vars_allowed = array('PKGNAME', 'PKGVER', 'PKGREL', 'PKGDESC', 'URL', 'LICENSE', 'ARCH', 'DEPENDS', 'SOURCE');
+      
+      $pkgbuild = array();
+      $max = count($lines);
+      
+      if ($max > 50) {
+        $max = 50;
+      }
+      
+      $i = 0;
+      while ($i < $max) {
+        $match = array();
+        $var = preg_match('/^%(.+)%$/', $lines[$i], $match);
+        
+        if ($var == 1 && in_array($match[1], $vars_allowed)) {
+          $varname = strtolower($match[1]);
+          $pkgbuild[$varname] = array();
+          $i++;
+          
+          while ($i < $max && strlen($lines[$i]) > 0) {
+            $pkgbuild[$varname][] = $lines[$i];
+            $i++;
+          }
+        }
+        
+        $i++;
+      }
+      
+      # There should only ever be one value for each of these
+      # We'll take the first one parsed (if any)
+      $pkgbuild['pkgname'] = $pkgbuild['pkgname'][0];
+      $pkgbuild['pkgver'] = $pkgbuild['pkgver'][0];
+      $pkgbuild['pkgrel'] = $pkgbuild['pkgrel'][0];
+      $pkgbuild['pkgdesc'] = $pkgbuild['pkgdesc'][0];
+      $pkgbuild['url'] = $pkgbuild['url'][0];
+      
+			# Check that the required values aren't empty
 			#
-			if (!$seen_build_function) {
-				$error = __("Missing build function in PKGBUILD.");
-			}
+			# TODO: This can no longer check if a variable exists, only if it's
+			# empty or not, there is also no way to check if the build function
+			# exists. If it's required you could just reuse how the AUR used
+			# to check PKGBUILDs to reimplement this stuff.
 			
-			$req_vars = array("md5sums", "source", "url", "pkgdesc", "license", "pkgrel", "pkgver", "arch", "pkgname");
+		  $req_vars = array("url", "pkgdesc", "pkgrel", "pkgver", "pkgname");
 			foreach ($req_vars as $var) {
-  			if (!array_key_exists($var, $pkgbuild)) {
+  			if (empty($pkgbuild[$var])) {
   				$error = __("Missing " . $var . " variable in PKGBUILD.");
   			}
 		  }
+		  
+		  if (!count($pkgbuild['license'])) {
+		    $error = __("Missing license variable in PKGBUILD.");
+		  }
+		  
+		  if (!count($pkgbuild['arch'])) {
+		    $error = __("Missing arch variable in PKGBUILD.");
+		  }
 		}
 
-		# TODO This is where other additional error checking can be
-		# performed.	Examples: #md5sums == #sources?, md5sums of any
-		# included files match?, install scriptlet file exists?
-		#
-		
-		# Check for http:// or other protocol in url
-		# 
+	  # Validate package url
 		if (!$error) {
 			$parsed_url = parse_url($pkgbuild['url']);
 			if (!$parsed_url['scheme']) {
 				$error = __("Package URL is missing a protocol (ie. http:// ,ftp://)");
 			}
 		}
-			
-		# Now, run through the pkgbuild array and do any $pkgname/$pkgver
-		# substituions.
-		#
-		# TODO: run through and do ALL substitutions, to cover custom vars
-		if (!$error) {
-			$pkgname_var = $pkgbuild["pkgname"];
-			$pkgver_var = $pkgbuild["pkgver"];
-			$new_pkgbuild = array();
-			while (list($k, $v) = each($pkgbuild)) {
-				$v = str_replace('$pkgname', $pkgname_var, $v);
-				$v = str_replace('${pkgname}', $pkgname_var, $v);
-				$v = str_replace('$pkgver', $pkgver_var, $v);
-				$v = str_replace('${pkgver}', $pkgver_var, $v);
-				$new_pkgbuild[$k] = $v;
-			}
-		}
-
-		# Now we've parsed the pkgbuild, let's move it to where it belongs
+		
+		# Validate package name
 		if (!$error) {
-			$pkg_name = str_replace("'", "", $pkgbuild['pkgname']);
-			$pkg_name = escapeshellarg($pkg_name);
-			$pkg_name = str_replace("'", "", $pkg_name);
-            
+		  $pkg_name = $pkgbuild['pkgname'];
 			$presult = preg_match("/^[a-z0-9][a-z0-9\.+_-]*$/", $pkg_name);
-			
 			if (!$presult) {
 				$error = __("Invalid name: only lowercase letters are allowed.");
 			}
@@ -221,7 +160,6 @@ if ($_COOKIE["AURSID"]):
 			$pkg_exists = package_exists($pkg_name);
 			if (can_submit_pkg($pkg_name, $_COOKIE["AURSID"])) {
 				if (file_exists(INCOMING_DIR . $pkg_name)) {
-					# Blow away the existing file/dir and contents
 					rm_rf(INCOMING_DIR . $pkg_name);
 				}
 
@@ -244,8 +182,9 @@ if ($_COOKIE["AURSID"]):
 		# Re-tar the package for consistency's sake
 		if (!$error) {
 			if (!@chdir(INCOMING_DIR . $pkg_name)) {
-				$error = __("Could not change directory to %s.",
-					array(INCOMING_DIR . $pkg_name));
+				$error = __("Could not change directory to %s."
+				           , INCOMING_DIR . $pkg_name
+				           );
 			}
 		}
 		
@@ -272,7 +211,10 @@ if ($_COOKIE["AURSID"]):
 			# needs to be preserved so that any votes are retained.	However,
 			# PackageDepends and PackageSources can be purged.
 			
-			$q = "SELECT * FROM Packages WHERE Name = '" . mysql_real_escape_string($new_pkgbuild['pkgname']) . "'";
+			# Licenses are meant to be arrays but are stored as one string in the AUR
+			$pkgbuild['license'] = implode(' ', $pkgbuild['license']);
+			
+			$q = "SELECT * FROM Packages WHERE Name = '" . mysql_real_escape_string($pkgbuild['pkgname']) . "'";
 			$result = db_query($q, $dbh);
 			$pdata = mysql_fetch_assoc($result);
 
@@ -307,12 +249,12 @@ if ($_COOKIE["AURSID"]):
 				
 				# Update package data
 				$q = sprintf( "UPDATE Packages SET ModifiedTS = UNIX_TIMESTAMP(), Name = '%s', Version = '%s-%s', License = '%s', Description = '%s', URL = '%s', LocationID = 2, FSPath = '%s', URLPath = '%s', OutOfDate = 0 WHERE ID = %d"
-				            , mysql_real_escape_string($new_pkgbuild['pkgname'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgver'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgrel'])
-				            , mysql_real_escape_string($new_pkgbuild['license'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgdesc'])
-				            , mysql_real_escape_string($new_pkgbuild['url'])
+				            , mysql_real_escape_string($pkgbuild['pkgname'])
+				            , mysql_real_escape_string($pkgbuild['pkgver'])
+				            , mysql_real_escape_string($pkgbuild['pkgrel'])
+				            , mysql_real_escape_string($pkgbuild['license'])
+				            , mysql_real_escape_string($pkgbuild['pkgdesc'])
+				            , mysql_real_escape_string($pkgbuild['url'])
 				            , mysql_real_escape_string(INCOMING_DIR . $pkg_name . "/" . $pkg_name . ".tar.gz")
 				            , mysql_real_escape_string(URL_DIR . $pkg_name . "/" . $pkg_name . ".tar.gz")
 				            , $pdata["ID"]
@@ -321,14 +263,11 @@ if ($_COOKIE["AURSID"]):
 				db_query($q, $dbh);
 
 				# Update package depends
-				$depends = explode(" ", $new_pkgbuild['depends']);
-        foreach ($depends as $dep) {
+        foreach ($pkgbuild['depends'] as $dep) {
 					$q = "INSERT INTO PackageDepends (PackageID, DepPkgID, DepCondition) VALUES (";
 					$deppkgname = preg_replace("/[<>]?=.*/", "", $dep);
           $depcondition = str_replace($deppkgname, "", $dep);
-                    
-          if ($deppkgname == "#") { break; }
-                    
+
 					$deppkgid = create_dummy($deppkgname, $_COOKIE['AURSID']);
           $q .= $pdata["ID"] . ", " . $deppkgid . ", '" . mysql_real_escape_string($depcondition) . "')";
 
@@ -336,8 +275,7 @@ if ($_COOKIE["AURSID"]):
 				}
 
 				# Insert sources
-				$sources = explode(" ", $new_pkgbuild['source']);
-				foreach ($sources as $src) {
+				foreach ($pkgbuild['source'] as $src) {
 					$q = "INSERT INTO PackageSources (PackageID, Source) VALUES (";
 					$q .= $pdata["ID"] . ", '" . mysql_real_escape_string($src) . "')";
 					db_query($q, $dbh);
@@ -347,13 +285,13 @@ if ($_COOKIE["AURSID"]):
 			  
 				# This is a brand new package
 				$q = sprintf( "INSERT INTO Packages (Name, License, Version, CategoryID, Description, URL, LocationID, SubmittedTS, SubmitterUID, MaintainerUID, FSPath, URLPath) VALUES ('%s', '%s', '%s-%s', %d, '%s', '%s', 2, UNIX_TIMESTAMP(), %d, %d, '%s', '%s')"
-				            , mysql_real_escape_string($new_pkgbuild['pkgname'])
-				            , mysql_real_escape_string($new_pkgbuild['license'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgver'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgrel'])
+				            , mysql_real_escape_string($pkgbuild['pkgname'])
+				            , mysql_real_escape_string($pkgbuild['license'])
+				            , mysql_real_escape_string($pkgbuild['pkgver'])
+				            , mysql_real_escape_string($pkgbuild['pkgrel'])
 				            , mysql_real_escape_string($_REQUEST['category'])
-				            , mysql_real_escape_string($new_pkgbuild['pkgdesc'])
-				            , mysql_real_escape_string($new_pkgbuild['url'])
+				            , mysql_real_escape_string($pkgbuild['pkgdesc'])
+				            , mysql_real_escape_string($pkgbuild['url'])
 				            , uid_from_sid($_COOKIE["AURSID"])
 				            , uid_from_sid($_COOKIE["AURSID"])
 				            , mysql_real_escape_string(INCOMING_DIR . $pkg_name . "/" . $pkg_name . ".tar.gz")
@@ -364,14 +302,11 @@ if ($_COOKIE["AURSID"]):
 				$packageID = mysql_insert_id($dbh);
 
 				# Update package depends
-				$depends = explode(" ", $new_pkgbuild['depends']);
-				foreach ($depends as $dep) {
+				foreach ($pkgbuild['depends'] as $dep) {
 					$q = "INSERT INTO PackageDepends (PackageID, DepPkgID, DepCondition) VALUES (";
 					$deppkgname = preg_replace("/[<>]?=.*/", "", $dep);
 					$depcondition = str_replace($deppkgname, "", $dep);
                     
-          if ($deppkgname == "#") { break; }
-          
           $deppkgid = create_dummy($deppkgname, $_COOKIE['AURSID']);
           $q .= $packageID . ", " . $deppkgid . ", '" . mysql_real_escape_string($depcondition) . "')";
         
@@ -379,8 +314,7 @@ if ($_COOKIE["AURSID"]):
 				}
 
 				# Insert sources
-				$sources = explode(" ", $new_pkgbuild['source']);
-				foreach ($sources as $src) {
+				foreach ($pkgbuild['source'] as $src) {
 					$q = "INSERT INTO PackageSources (PackageID, Source) VALUES (";
 					$q .= $packageID . ", '" . mysql_real_escape_string($src) . "')";
 					db_query($q, $dbh);
@@ -388,8 +322,6 @@ if ($_COOKIE["AURSID"]):
 			  
 			}
 		}
-
-		chdir($_SERVER['DOCUMENT_ROOT']);
 	}
 
 
@@ -488,6 +420,7 @@ endif;
 </div>
 
 <?php
+chdir($_SERVER['DOCUMENT_ROOT']);
 html_footer(AUR_VERSION);
 # vim: ts=2 sw=2 noet ft=php
 ?>
diff --git a/web/utils/parsepkgbuild.sh b/web/utils/parsepkgbuild.sh
new file mode 100755
index 0000000..cba7354
--- /dev/null
+++ b/web/utils/parsepkgbuild.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+if [ ! -e $1 ]; then
+    exit
+fi
+
+ulimit -t 1
+export PATH=''
+exec /bin/bash --noprofile --norc --restricted << EOF
+
+source $1
+
+[ -n "\$pkgname" ] && echo -e "%PKGNAME%\n\$pkgname\n"
+[ -n "\$pkgver" ] && echo -e "%PKGVER%\n\$pkgver\n"
+[ -n "\$pkgrel" ] && echo -e "%PKGREL%\n\$pkgrel\n"
+[ -n "\$pkgdesc" ] && echo -e "%PKGDESC%\n\$pkgdesc\n"
+[ -n "\$url" ] && echo -e "%URL%\n\$url\n"
+
+if [ -n "\$license" ]; then
+	echo "%LICENSE%"
+	for i in \${license[@]}; do echo \$i; done
+	echo ""
+fi
+
+if [ -n "\$arch" ]; then
+	echo "%ARCH%"
+	for i in \${arch[@]}; do echo \$i; done
+	echo ""
+fi
+
+if [ -n "\$depends" ]; then
+	echo "%DEPENDS%"
+	for i in \${depends[@]}; do echo \$i; done
+	echo ""
+fi
+
+if [ -n "\$source" ]; then
+	echo "%SOURCE%"
+	for i in \${source[@]}; do echo \$i; done
+	echo ""
+fi
+
+EOF
-- 
1.5.5.3

