Rev 5485 | Blame | Last modification | View Log | RSS feed
######################################################################### Copyright (c) VIX TECHNOLOGY (AUST) LTD## Module name : create_dpkgFromTar.pl# Module type : JATS Utility# Compiler(s) : Perl# Environment(s): jats## Description : This JATS utility is used by the build system to merge# build artifacts from multiple build machines into one# package.## It complements the 'tarmode' provided by create_dpkg## It is not intended to be run by a user.# It is not intended to be run directly by the build system# It is intended to be run from the build daemons via a shh session# Progress is reported via stdout# Exit code indicates success or error## Usage : See POD at the end of this file##......................................................................#require 5.008_002;# Include Standard Perl Functions#use strict;use warnings;use Cwd;use Getopt::Long;use File::Basename;use File::Find;use File::Path;use File::Copy;use Pod::Usage;use XML::Simple;use Encode qw(decode encode);use File::Temp qw/ tempfile tempdir /;use JatsError;use JatsEnv;use FileUtils;use JatsSystem;use ArrayHashUtils;# define Global variables#my $VERSION = "1.0.0";my $PROGNAME = "create_dpkgFromTar.pl";# Globals imported from environment#our $GBE_MACHTYPE;our $GBE_HOSTNAME;our $GBE_DPKG;our $USER;our $GBE_ABT;# Global variables#my $tmpDirInfo;my $workDir;my $startDir;my $maxHostNameLength = 8;my $pkgTargetDir;my $deleteTargetDir;## Option variables#my $opt_help = 0;my $opt_manual = 0;my $opt_verbose = 0;my $opt_pname;my $opt_pversion;my $opt_srcPath;my $opt_MergeErrors = 0;my $opt_outputPath;my $opt_preDelete;my $opt_tmpDir;#-------------------------------------------------------------------------------# Function : main entry point## Description : Main Entry point## Inputs :## Returns :## Process any command line arguements...my $result = GetOptions ('help:+' => \$opt_help, # flag, multiple use allowed'manual:3' => \$opt_help, # flag'verbose:+' => \$opt_verbose, # flag, multiple use allowed'pname=s' => \$opt_pname, # string'pversion=s' => \$opt_pversion, # string'srcpath=s' => \$opt_srcPath, # string'mergeErrors!' => \$opt_MergeErrors, # [no]flag'output=s' => \$opt_outputPath, # String'tmpdir=s' => \$opt_tmpDir, # String'predelete!' => \$opt_preDelete, # [no]flag);## Process help and manual options#pod2usage(-verbose => 0, -message => "Version: $VERSION") if ($opt_help == 1 || ! $result);pod2usage(-verbose => 1) if ($opt_help == 2 );pod2usage(-verbose => 2) if ($opt_help > 2);## Init the error and message subsystem#ErrorConfig( 'name' =>'CREATE_DPKG','verbose' => $opt_verbose );if ($opt_verbose){Verbose ("Program: $PROGNAME");Verbose ("Version: $VERSION");}## Needed EnvVars#EnvImport ('GBE_MACHTYPE');EnvImport ('GBE_HOSTNAME');EnvImport ('USER' );EnvImport ('GBE_DPKG' );EnvImportOptional ('GBE_ABT', '');# DefaultsInitFileUtils();$startDir = Getcwd;$opt_outputPath = $::GBE_DPKG unless defined $opt_outputPath;$opt_srcPath = AbsPath($opt_srcPath) if defined $opt_srcPath;$opt_tmpDir = AbsPath($opt_tmpDir) if defined $opt_tmpDir;## Basic sanity testing#Error ("Path for package fragments not specified") unless defined $opt_srcPath;Error ("Package fragment path not found", $opt_srcPath) unless -d $opt_srcPath;Error ("DPKG_ARCHIVE not found", $GBE_DPKG) unless -d $GBE_DPKG;Error ("Package name not specified") unless defined $opt_pname;Error ("Package version not specified") unless defined $opt_pversion;Error ("Output path not specified" ) unless defined $opt_outputPath;Error ("Output path does not exist", $opt_outputPath) unless -d $opt_outputPath;Error ("TmpDir does not exist:", $opt_tmpDir) if (defined($opt_tmpDir) && ! -d ($opt_tmpDir));## Create a temp work directory for this# This will be removed on program exit# Not by File:Temp as it doesn't handle the case where we have chdir'd to the temp area#if ($opt_tmpDir){$workDir = $opt_tmpDir;}else{$tmpDirInfo = File::Temp->newdir( 'dpkgFromTar_XXXX', CLEANUP => 0, DIR => '/tmp' );$workDir = $tmpDirInfo->dirname;}Verbose("WorkDir", $workDir);chdir($workDir)|| Error ("Cannot chdir to working directory: $workDir");## Locate all package fragements# There must be at least one# Package fragments are named after the package name and version and have a .tar.gz suffix#my $basename = join('_', $opt_pname, $opt_pversion);my $basenameLen = 1 + length $basename;$basename .= '_*.tar.gz';my @packageFragments = glob (catfile($opt_srcPath, $basename ));Error ("No package fragments found.", "Path: $opt_srcPath", "Glob: $basename" ) unless @packageFragments;Message("Package Fragmnets found:", @packageFragments);## Extract the built.files.<hostname>.xml and descpkg from each of package fragments# Note: Use of -m flag to tar is to overcome issues with the bsdtar used under windows# to create the tar.gz files. It appears to insert localtime and not GMT into# the file.#my %pkgData;foreach my $srcfile ( @packageFragments){Message ("Extracting metadata from " . StripDir($srcfile));my $basename = $srcfile;$basename =~ s~^.*/~~;$basename =~ s~\.gz$~~;$basename =~ s~\.tar$~~;$basename = substr($basename, $basenameLen);$pkgData{$srcfile}{basename} = $basename;mkpath ($basename);Error ("Temp subdir $basename not created: $!") unless -d $basename;my $rv = System ('tar', '-xzmf', $srcfile,IsVerbose(1) ? '-v' : undef,'-C', $basename,'--wildcards', './built.files.*.xml' );Error("Tar extraction error: $srcfile") if ($rv);}## Read in the XML from each of the files#my %fileData;foreach my $srcfile ( keys %pkgData ){my @extracted = glob(catfile($pkgData{$srcfile}{basename}, 'built.files.*.xml'));foreach my $srcfile ( @extracted){my $ref = XML::Simple::XMLin($srcfile, ForceArray => 1, KeyAttr => []);#DebugDumpData("REF - $srcfile, " .ref($ref), $ref);foreach my $entry (@{$ref->{file}}){# directory - no processing requirednext if $entry->{type} eq 'dir';# link - no processing reqiuirednext if $entry->{type} eq 'link';# file - ensure there is no clashif ($entry->{type} eq 'file'){# Calc max host name length for pretty printingmy $hostnameLen = length $entry->{host};$maxHostNameLength = $hostnameLen if ($hostnameLen > $maxHostNameLength);my $hostEntry = {host => $entry->{host}, md5sum => $entry->{md5sum}};push @{$fileData{$entry->{fullname}}{hosts}}, $hostEntry;my $store = $fileData{$entry->{fullname}};if (exists $store->{md5sum}){# Compare existing entry and add new infounless ($store->{md5sum} eq $entry->{md5sum}){$store->{bad} = 1;}}else{# Create new entry$store->{md5sum} = $entry->{md5sum};}next;}# Unknown - just a warning for nowWarning( "Unknown type:" . $entry->{fullname} . ':' . $entry->{type} );}}}## Cleanout the non-bad entries# Report on merge errors#my $headerReported;foreach my $entry (keys %fileData){## Some entries are allowed to differ# descpkg# version_*.h# files as these are generated and may contain different dates and line endings#if ($entry eq 'descpkg'){delete $fileData{$entry};next;}if ($entry =~ m~/version[^/]*\.h$~){Verbose("Ignore merge error on: $entry");delete $fileData{$entry};next;}## Delete entry if its not marked as badunless (exists $fileData{$entry}{bad} ){delete $fileData{$entry};next;}## Have a merge error# Detail what has happened# Generate pretty output showning on which machines that are command.#unless ($headerReported){$headerReported = 1;reportMergeError('Package Merge Error. File provided by different builds are not identical');reportMergeError('This prevents the build from being reproducible.');}reportMergeError('File Name: ' . $entry);# foreach my $e ( @{$fileData{$entry}{hosts}} )# {# reportMergeError(' Provided by: ' . sprintf('%-*s',$maxHostNameLength,$e->{host}) . ' Signature: ' . $e->{md5sum});# }my %md5List;foreach my $e ( @{$fileData{$entry}{hosts}} ) {UniquePush (\@{$md5List{$e->{md5sum}}}, $e->{host});}foreach my $e ( @{$fileData{$entry}{hosts}} ){my $hostList;my @sameHosts = @{$md5List{$e->{md5sum}}};ArrayDelete (\@sameHosts, $e->{host});if (@sameHosts) {$hostList = ' Same as: ' . join(', ', @sameHosts);} else {$hostList = ' Unique to: '. $e->{host};}reportMergeError(' Provided by: ' . sprintf('%-*s',$maxHostNameLength,$e->{host}) . $hostList );}}ErrorDoExit();## Calculate target package location#$pkgTargetDir = catdir($opt_outputPath, $opt_pname, $opt_pversion);Verbose("Package Target: $pkgTargetDir");RmDirTree($pkgTargetDir) if $opt_preDelete;Error ("Target package directory exists") if -d $pkgTargetDir;mkpath ($pkgTargetDir);Error ("Package target not created: $!", $pkgTargetDir) unless -d $pkgTargetDir;$deleteTargetDir = 1;## Extract the archive contents and merge them into one directory# If there are overlaps - don't replace them#foreach my $srcfile ( keys %pkgData ){Message ("Extracting all files from " . StripDir($srcfile));my $rv = System ('tar', '-xzmf', $srcfile, IsVerbose(1) ? '-v' : undef, '-C', $pkgTargetDir );Error("Tar extraction error: $srcfile") if ($rv);}## Fix file permissions# We know we are running under unix so we will use a unix command#Message('Setting file permissions');System('chmod', '-R', 'a+rx', $pkgTargetDir);## Fix descpkg file# Original create_dpkg uses the CopyDescpkg function. This is a bit wonky# All it appears to do is:# Force build machine name# Force user name# Force build time into the descpkg file# If a package was built on multiple machines then the build machine names were lost## This implementation# Use the descpkg file in the first package fragment# There is enough other information in the build system to track where the package# was built. This was not available when CopyDescpkg was implemented## All Done# Flag - don't cleanup generated dierctory#Information("Package Target: $pkgTargetDir");$deleteTargetDir = 0;exit 0;#-------------------------------------------------------------------------------# Function : END## Description : Cleanup process## Inputs :## Returns :#END{## Delete everything in the temp directory# It was a directory created by this instance for the use of this instance#if ($tmpDirInfo){Message("Cleanup processing");chdir($startDir);RmDirTree($workDir);if (-d $workDir){Warning("TMPDIR still exists: $workDir");}}elsif ($workDir){Message ("Retaining workdir: $workDir");}## Delete the package target dir# We must have created it - as we error if it exists.## Remove the packageName and packageVersion directories fi possible#if ($deleteTargetDir){Message("Remove partially created package");RmDirTree($pkgTargetDir);my $pkgDir = StripFileExt($pkgTargetDir);rmdir($pkgDir) && Message("Remove package dir: $pkgDir");}}#-------------------------------------------------------------------------------# Function : reportMergeError## Description : Report an error or a warning## Inputs : All arguments passed to ReportError or Warning## Returns : Nothing#sub reportMergeError{$opt_MergeErrors ? Warning(@_) : ReportError(@_);}#-------------------------------------------------------------------------------# Documentation#=pod=for htmltoc SYSUTIL::=head1 NAMEcreate_dpkgFromTar - Create a dpkg_archive entry from a set of tar files=head1 SYNOPSISjats create_dpkgFromTar [options]Options:-help - Brief help message-help -help - Detailed help message-man - Full documentation-verbose - Display additional progress messages-pname=name - Ensure package is named correctly-pversion=version - Ensure package version is correct-srcdir=path - Location of the package fragmentsDebug and Testing:-[no]mergeErrors - Allow merge errors-[no]predelete - Predelete generated package-output=path - Base of test package archive=head1 OPTIONS=over 8=item B<-help>Print a brief help message and exits.=item B<-help -help>Print a detailed help message with an explanation for each option.=item B<-man>Prints the manual page and exits.=item B<-srcdir=path>This option specifies the path of the packages fragments. The fragments will belocated using the package name and package version.=item B<-pname=name>The name of the target package=item B<-pversion=version>The version of the target package.=item B<-[no]mergeErrors>This option allows the merging process to continue if merge errors are located.The default is -noMergeErrorsThis option is intended for testing use only.=item B<-[no]predelete>This option will delete the target package instance before the package is assembled.The default is -noPreDeleteThis option is intended for testing use only.=item B<-output=path>This option allows the user to specify to root of a test package archive.The dafualt is to use the value provided by GBE_DPKG - the main package archive.This option is intended for testing use only.=back=head1 DESCRIPTIONThis utility program is used by the build system to merge build artifacts from severalbuild machines into one package.The build artifacts have been delivered to the package store as a collectionof zipped tar files (.tar.gz). There will be one tar file from each machine in the build set.The process has been designed to overcome several problems:=over 4=item SpeedIf some of the build machines are not co-located with the master package server, thenthe process of transferring a package with a large number of files can be very slow.ie: > 1 second per file to transfer a file from AWS(Sydney) to PCC(Perth).If a package has several thousand files then this can take an hour.If the packaged files are compressed into a single file, then the file creation overhead is eliminated.=item Atomic File CreationFor package fragments to be transferred from multiple machines without error some form ofmulti-machine mutex is required. This has not been successfully implemented - after many attempts.If the merge operation is done by the package server, then there is no need for a mutex.=backThe process of transferring tarballs and then merging then in one location solves these two problems.The reconstruction process is performed by a daemon on the package archive server to address the following issues:=over 4=item * Windows handling of symlinksSymbolic links will be handled correctly on the package server as the file system is native.=item * Network SpeedBy running the merge on the package server the contents of the package are not dragged to andfrom the build server. If the build server is not co-located with the package archive then therewill be a major speed penalty.=backThe basic process performed by this utility is:=over 4=item *Locate all parts of the package. There should be one from each build machine that is a partof the build set, unless the build was generic. For each package fragment:=over 4=item *Extract a 'built.files.<machname>' file - the file must exist.=item *Read all 'built.files.<machname>' files and in the process determine if there are any conflicts.A conflict is deemed to exist if the files have different MD5 digests. This allows the same fileto be provided by different builds - as long as the content is the same. Line endings are handledin a machine independent manner.=item *Detect dead symbolic links.=back=item *If there are no file conflicts or other detected errors, then all parts of the package will beextracted into a single directory.=item *File permisions will be adjusted. All directories will be made world readable and all files will be made world executable.=back=cut