Blame | Last modification | View Log | RSS feed
#!/usr/bin/perl -w######################################################################### Copyright (C) 2008 ERG Limited, All rights reserved## Module name : schemadump.pl# Module type : Standalone utility# Compiler(s) : n/a# Environment(s): windows (dos), unix (solaris and linux)## Description : Extracts a directory structure from the output of# datapump. This is useful for creating files to# be put into source control from a database.## History : Created by Haydon Knight April 2008## Usage : schemadump.pl [options] -src=<srcFile> -dest=<destDir>################################################################################################################################################# Use lines#######################################################################require 5.6.1;use strict;use warnings;use Pod::Usage; # required for help supportuse Getopt::Long;use commonExports; # see commonExports.pm in this directory######################################################################## Function prototypes#######################################################################sub startupChecks();sub parseCommandLine();sub main();sub getSubSectionLiness(\@);sub getFullDirs(\@);sub getSubDirs(@);sub writeData(\@\@);sub getFileLines($\$@);sub getFilename($@);sub writeDDLHeader(@);sub packageBodySplit(\$@);sub splitOnBlank(\$@);sub splitOnSlash(\$@);sub splitOnSemiColon(\$@);sub splitOnCreate(\$@);######################################################################## Constant global variables#######################################################################my $VERSION = "1.0.1";######################################################################## Other global variables#######################################################################my $srcFile;my $destDir;######################################################################## Main code#######################################################################parseCommandLine();startupChecks();main();finish();######################################################################## Function definitions########################################################################-------------------------------------------------------------------------------# Function : main## Purpose : Main function of this script## Arguments : none## Returns : none## Notes : Does everything but setting up at start and tearing down at end.#sub main(){my @lines = readArray( $srcFile ); # all the lines in the datapump dump filemkdir "$destDir/bookKeeping" unless -d "$destDir/bookKeeping";writeDDLHeader( @lines );# getFullDirs() returns an array of strings.# Each of these is of the form '<DIRY_1>/<DIRY_2>/.../<DIRY_N>.# e.g. one element is 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS'.my @fullDirs = getFullDirs( @lines );# getSubDirs() returns an array of strings.# Each of these is of the form '<diry_1>/<diry_2>'.# e.g. the element formed from 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS' is# 'table/index_statistics'.my @subDirs = getSubDirs( @fullDirs );# This is a 2-D array - i.e. it is an array of (references to arrays)# Each reference to an array corresponds to an element of @subDirs.# For each of these there is an array of the lines corresponding to that DDL-data-type.my @subSectionLiness = getSubSectionLiness(@lines);die "BUG! Did not get the same number of sub directories as blocks of subsection lines (" .scalar(@subDirs) . " vs " . scalar(@subSectionLiness) . ")\n"unless scalar(@subDirs) == scalar(@subSectionLiness);writeArray( "$destDir/bookKeeping/objectTypes", @fullDirs);createDirectories( $destDir, @subDirs );my $linesWritten = writeData( @subDirs, @subSectionLiness );# Minus one for the very first line: the '-- CONNECT RELEASE_MANAGER' linemy $linesDesired = scalar(@lines) - scalar(@subDirs) - 1;die "Internal logic error! Desired to write out $linesDesired lines but wrote out " ." $linesWritten lines\n"unless $linesDesired == $linesWritten;}#-------------------------------------------------------------------------------# Function : writeDDLHeader## Purpose : Writes the first line of the enormous datapump dump file to a file# This line is used by ddlfile.pl to reconstruct the dump file.## Arguments : @lines (i) - all the lines from the datapump dump file## Returns : nothing## Notes :#sub writeDDLHeader(@){my @lines = @_;die "No lines read from input file\n" unless @lines;my $connectLine = $lines[0];die "Could not read the string 'CONNECT' from first line of input file ('$connectLine')\n"unless $connectLine =~ m~^-- CONNECT\s+~;writeArray( "$destDir/bookKeeping/DDLHeader", ($connectLine) );}#-------------------------------------------------------------------------------# Function : getSubSectionLiness## Purpose : Constructs a 2D array of lines in each DDL-type# i.e. for each DDL-type (A DDL-type is a sequence or a package or a table etc)# there is an array of lines## Arguments : $refLines (i) - Reference to @lines - the list of all lines in the datapump dump file## Returns : @subSectionLiness - an array of references to arrays## Notes : Simply splits on lines containing $subSectionSplitter#sub getSubSectionLiness(\@){my ($refLines) = @_;my @subSectionLiness;my @tempList;my $isFirstTime = 1;foreach my $line (@$refLines){if( $line =~ m~^$subSectionSplitter~ ){push @subSectionLiness, [ @tempList ] unless $isFirstTime;$isFirstTime = 0;@tempList = ();}else{push @tempList, $line;}}push @subSectionLiness, [ @tempList ];logprint "Got " . scalar(@subSectionLiness) . " sets of subSection Lines";return @subSectionLiness;}#-------------------------------------------------------------------------------# Function : getFullDirs## Purpose : Constructs an array of directory paths## Arguments : $refLines (i) - reference to @lines - all lines in datapump dump file## Returns : @fullDirys - array of directory paths## Notes : Reads in directory paths from the '--new object' lines in the datapump dump file# The directory paths have extra subdirectories than what is actually mkdir'ed# Subsequent functionality (getTopBotDirs() for example) strips out these# middle subdirectories#sub getFullDirs(\@){my ($refLines) = @_;################################################################ extract out from lines starting with '-- new object'# strings of the form 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS'my @fullDirys;foreach my $line (@$refLines){next unless $line =~ m~^$subSectionSplitter~;(my $fullDiry = $line) =~ s~^$subSectionSplitter~~;push @fullDirys, $fullDiry;logprint "Got fullDiry '$fullDirys[-1]'";}logprint "Got " . scalar(@fullDirys) . " fullDirys contributing to subDirs\n";return @fullDirys;}#-------------------------------------------------------------------------------# Function : getSubDirs## Purpose : Constructs the list of subdirectories that will be mkdir'ed and filled# with .sql files## Arguments : @fullDirys (i) - list of directories parsed from datapump dump file## Returns : @subDirs - list of directories that will exist if script finishes successfully## Notes :#sub getSubDirs(@){my @fullDirys = @_;my @topDirs;my @botDirs;foreach my $fullDiry (@fullDirys){my ($topDir, $botDir) = getTopBotDirs( $fullDiry );push @topDirs, $topDir;push @botDirs, $botDir;}################################################################ combine top/bot dirsmy @subDirs;for( my $i = 0; $i < scalar(@topDirs); $i++){push @subDirs, "$topDirs[$i]/$botDirs[$i]";logprint "Pushed subDir $i: '$subDirs[-1]'";}################################################################ convert to lower caseforeach my $subDir (@subDirs){$subDir =~ tr~A-Z~a-z~;}return @subDirs;}#-------------------------------------------------------------------------------# Function : parseCommandLine## Purpose : Parses command line; invokes help if necessary## Arguments : nothing## Returns : nothing## Notes : Sets up various global variables; these are not checked here - they should be# checked in startupChecks()#sub parseCommandLine(){my $opt_help = 0;my $opt_manual = 0;my $result = GetOptions ("help" => \$opt_help, # flag"manual" => \$opt_manual, # flag"verbose+" => \$verbose, # flag"src=s" => \$srcFile, # String"dest=s" => \$destDir, # String"log=s" => \$logFile, # String);pod2usage(-verbose => 2) if( $opt_manual );pod2usage(-verbose => 0, -message => "Version: $VERSION") if ($opt_help > 0 || ! $result );}#-------------------------------------------------------------------------------# Function : startupChecks## Purpose : Checks that important variables are set sanely## Arguments : nothing## Returns : nothing## Notes : Calls die() if things aren't set sanely.# This function opens the logfile.#sub startupChecks(){die "You need to specify a source file using '-src=<srcFile>'\n" unless $srcFile;die "You need to specify an existing destination directory using '-dest=<destDir>'\n"unless $destDir;$srcFile =~ s~\\~/~g;$destDir =~ s~\\~/~g;die "Source file '$srcFile' does not exist\n" if( ! -e $srcFile || -d $srcFile );die "Destination directory '$destDir' does not exist\n" unless -d $destDir;openLog();}#-------------------------------------------------------------------------------# Function : getFileLines## Purpose : Extracts from a set of lines only those lines for each DDL-data-object.# e.g. extracts lines for a package from the lines corresponding to all packages.# This function also increments the line counter.## Arguments : $subDir (i) - The subdirectory the DDL-data-object is being created in# $refIline (io) - Reference to $iline - how far through the current lineset# we currently are. (each lineset might correspond to all# packages or all tables or all views etc)# @lines (i) - The current lineset## Returns : $addedExtraLines - boolean value; true indicates that an 'ALTER PACKAGE'# DDL-data-object was appended onto the list of lines for this# DDL-data-object. This only occurs for some 'PACKAGE_BODY'# objects in the release manager database.# @fileLines - The lines corresponding to the DDL-data-object## Notes : Invokes helper functions for each type of splitting. These functions are# tailored to the release manager database and may# need to be altered for other databases.#sub getFileLines($\$@){my ($subDir, $refIline, @lines) = @_;# Skip over blank lines$$refIline++ while( $$refIline < scalar(@lines) && $lines[$$refIline] =~ m~^\s*$~ );return (0,"end of data") if( $$refIline >= scalar(@lines) );my %fileLinesFunctions = (qq(type/type) => \&splitOnSlash,qq(function/function) => \&splitOnSlash,qq(procedure/procedure) => \&splitOnSlash,qq(package/package_body) => \&packageBodySplit,qq(package/package_spec) => \&splitOnSlash,qq(synonym/synonym) => \&splitOnBlank,qq(sequence/sequence) => \&splitOnBlank,qq(table/table) => \&splitOnBlank,qq(view/view) => \&splitOnSemiColon,qq(table/index) => \&splitOnCreate,qq(table/se_tbl_fbm_index_index) => \&splitOnCreate);die "Internal coding error: could not find a case for subdirectory" ." '$subDir' in the fileLinesFunctions hash table\n"unless $fileLinesFunctions{$subDir};return $fileLinesFunctions{$subDir}->( $refIline, @lines );}#-------------------------------------------------------------------------------# Function : splitOnBlank## Purpose : Extracts out the next DDL object from a set of lines by# adding lines until a delineating blank line is found## Arguments : $refIline (io) - reference to $iline - line to start searching on# @lines (i) - Array to search through## Returns : $addedExtraLines - always 0# @fileLines - The lines corresponding to the DDL-data-object## Notes :#sub splitOnBlank(\$@){my ($refIline, @lines) = @_;my @fileLines;# Keep adding lines until we get a blank linewhile( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~^\s*$~ ){push @fileLines, $lines[$$refIline];$$refIline++;}# Skip past the blank line$$refIline++ if $$refIline < scalar(@lines);return (0, @fileLines);}#-------------------------------------------------------------------------------# Function : splitOnCreate## Purpose : Extracts out the next DDL object from a set of lines by# searching for the next 'CREATE' keyword.## Arguments : $refIline (io) - reference to $iline - line to start searching on# @lines (i) - Array to search through## Returns : $addedExtraLines - always 0# @fileLines - The lines corresponding to the DDL-data-object## Notes :#sub splitOnCreate(\$@){my ($refIline, @lines) = @_;my @fileLines;my $gotFirstCreateLine = 0;# Keep adding lines until we reach the second line starting with 'CREATE':while( $$refIline < scalar(@lines) &&( !$gotFirstCreateLine || $lines[$$refIline] !~ m~^\s*CREATE~) ){push @fileLines, $lines[$$refIline];$gotFirstCreateLine = 1 if $lines[$$refIline] =~ m~^\s*CREATE~;$$refIline++;}return (0, @fileLines);}#-------------------------------------------------------------------------------# Function : splitOnSemiColon## Purpose : Extracts out the next DDL object from a set of lines by# adding lines until a terminating semicolon is found## Arguments : $refIline (io) - reference to $iline - line to start searching on# @lines (i) - Array to search through## Returns : $addedExtraLines - always 0# @fileLines - The lines corresponding to the DDL-data-object## Notes :#sub splitOnSemiColon(\$@){my ($refIline, @lines) = @_;my @fileLines;# Keep adding lines until we get a line ending with a semicolonwhile( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~;\s*$~ ){push @fileLines, $lines[$$refIline];$$refIline++;}# Add the line with the semicolonif( $$refIline < scalar(@lines) ){push @fileLines, $lines[$$refIline];$$refIline++;}return (0, @fileLines);}#-------------------------------------------------------------------------------# Function : splitOnSlash## Purpose : Extracts out the next DDL object from a set of lines by# adding lines until a terminating slash is found## Arguments : $refIline (io) - reference to $iline - line to start searching on# @lines (i) - Array to search through## Returns : $addedExtraLines - always 0# @fileLines - The lines corresponding to the DDL-data-object## Notes :#sub splitOnSlash(\$@){my ($refIline, @lines) = @_;my @fileLines;# Keep adding lines until we get a line starting with a slashwhile( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~^/\s*$~ ){push @fileLines, $lines[$$refIline];$$refIline++;}# Add the line with the slashif( $$refIline < scalar(@lines) ){push @fileLines, $lines[$$refIline];$$refIline++;}return (0, @fileLines);}#-------------------------------------------------------------------------------# Function : packageBodySplit## Purpose : Extracts out the next DDL object from a set of lines by# adding lines until a terminating slash is found. It then# adds extra lines from an 'ALTER PACKAGE' directive# if such a directive immediately follows the slash.## Arguments : $refIline (io) - reference to $iline - line to start searching on# @lines (i) - Array to search through## Returns : $addedExtraLines - always 0# @fileLines - The lines corresponding to the DDL-data-object## Notes :#sub packageBodySplit(\$@){my ($refIline, @lines) = @_;my ($addedExtraLines, @fileLines) = splitOnSlash($$refIline, @lines);# Keep looking for lines if there is a 'ALTER PACKAGE' in the next# slash-delimited regionif( @fileLines && $$refIline < scalar(@lines) ){my $keyWord = $fileLines[0];$keyWord =~ s~^\s*CREATE\s+PACKAGE\s+BODY\s+".*?"\."(.*?)".*~$1~;my @extraLines;my $jline = $$refIline;# Skip over blank lines$jline++ while( $jline < scalar(@lines) && $lines[$jline] =~ m~^\s*$~ );# add lines to @extraLines until we find a slashwhile( $jline < scalar(@lines) && $lines[$jline] !~ m~^/\s*$~ ){push @extraLines, $lines[$jline];$jline++;}# add in the line with the slash to @extraLinesif( $jline < scalar(@lines) ){push @extraLines, $lines[$jline];$jline++;}if( @extraLines &&$extraLines[0] =~ m~^\s*ALTER\s+PACKAGE\s+".*?"\."${keyWord}"~ ){# Yes! Add in the 'ALTER PACKAGE' text$$refIline = $jline;push @fileLines, @extraLines;$addedExtraLines = 1;}}return ($addedExtraLines, @fileLines);}#-------------------------------------------------------------------------------# Function : getFilename## Purpose : Extracts the name of the .sql file to create from its lines## Arguments : $subDir (i) - The subdirectory where the .sql file will be created# @fileLines (i) - The lines to be parsed for the filename## Returns : $keyWord - the filename to be created (doesn't have any path attached but# does have a .sql extension)## Notes : This function is tailored specifically for the release manager database, so cannot be# guaranteed to work for other databases#sub getFilename($@){my ($subDir, @fileLines) = @_;return "allData.sql" unless defined $wantToExtract{$subDir};die "Bug! No lines scanned for subDir '$subDir'\n" unless scalar(@fileLines);my $keyWord = $fileLines[0];# Extract out the word that is in the second double quotes# e.g. for the line:#CREATE SYNONYM "RELEASE_MANAGER"."APPLICATIONS" FOR "ACCESS_MANAGER"."APPLICATIONS";# extract out 'APPLICATIONS'$keyWord =~ s~^.*?".*?"\."(.*?)".*~$1~;if( $subDir eq "type/type" ){my $firstWord = $fileLines[0];# $firstWord literally stores the first word of the line# which for 'type/type' could be 'CREATE' or 'ALTER'$firstWord =~ s~^\s*(\S+).*~$1~;$keyWord .= "_$firstWord";}$keyWord .= ".sql";$keyWord =~ tr~A-Z~a-z~;logprint "getFilename() returning '$keyWord' for subdir '$subDir'";return $keyWord;}#-------------------------------------------------------------------------------# Function : writeData## Purpose : Write out files to disk## Arguments : $refSubDirs (i) - reference to @subDirs - list of subdirectories having# files created.# $refSubSectionLiness (i) - reference to @subSectionLiness (2D array of lines# per each DDL-data-type)## Returns : $linesWritten - a count of the lines written## Notes : This is a major function as it calls getFileLines() repeatedly to break# down line-list within @subSectionLiness and also performs a lot of loops.# So really does a lot more than just writing data.## This function also records to file which of the files have had 'extra' bits# written to them. (For release manager these are a few 'PACKAGE BODY' functions# that have had 'ALTER PACKAGE' added on). This is so that ddlfile.pl can break# these back into two DDL-data-objects and re-delimit them with a slash.## This function also writes out an index file of all filenames created.# This allows the order in which the files were in the original datapump dump# file to be recorded.#sub writeData(\@\@){my ($refSubDirs, $refSubSectionLiness) = @_;my $linesWritten = 0;my @filesWithExtras;my @filenamesCreated;for( my $isubdir = 0; $isubdir < scalar(@$refSubDirs); $isubdir++){# Simplify by copying the array of lines for this subdirectory into '@lines'my @lines = @{ $$refSubSectionLiness[$isubdir] };logprint "Working with subdir '$$refSubDirs[$isubdir]'";# Recall that wantToExtract hash indicates whether or not we want to# write one or many (i.e. extract them out) files to each subdirectory.if( $wantToExtract{$$refSubDirs[$isubdir]} ){logprint "Going to separate into separate files for subdir '$$refSubDirs[$isubdir]'";my $iline = 0;my %filenames;# Keep extracting out lines for files until we run outuntil( $iline >= scalar(@lines) ){my ($addedExtraLines, @fileLines) =getFileLines( $$refSubDirs[$isubdir], $iline, @lines);last if( scalar(@fileLines) == 1 && $fileLines[0] eq "end of data" );my $filename = getFilename( $$refSubDirs[$isubdir], @fileLines );my $fullname = "$destDir/$$refSubDirs[$isubdir]/$filename";push @filesWithExtras, "$$refSubDirs[$isubdir]/$filename" if $addedExtraLines;die "Duplicate filename '$fullname'" if $filenames{$filename};$filenames{$filename} = 1;writeArray( $fullname, @fileLines);push @filenamesCreated, "$$refSubDirs[$isubdir]/$filename";}}else{logprint "Writing out just one file in subdir '$$refSubDirs[$isubdir]'";writeArray( "$destDir/$$refSubDirs[$isubdir]/allData.sql", @lines);push @filenamesCreated, "$$refSubDirs[$isubdir]/allData.sql";}$linesWritten += scalar( @lines );}writeArray( "$destDir/bookKeeping/filesWithExtras", @filesWithExtras);writeArray( "$destDir/bookKeeping/orderedFiles", @filenamesCreated );return $linesWritten;}######################################################################## Documentation#######################################################################=pod=head1 NAMEschemadump.pl - creates clearcase directory structure from datapump output=head1 SYNOPSISschemadump.pl [options] -src=<srcFile> -dest=<destDir>Options:-help - brief help message-man - Full documentation-src=srcFile - File that stores the output of datapump (mandatory)-dest=destDir - Directory to unpack to (mandatory)-log=logFile - Log messages to this file=head1 OPTIONS=over 8=item B<-help>Print a brief help message and exits.=item B<-man>Prints the manual page and exits.=item B<-src=srcFile>Specify what file to extract out the clearcase files from.=item B<-dest=destDir>Specify where to unpack the clearcase files to. This must be anexisting directory.=item B<-log=logFile>Specify a file to write log messages to. Default is to just writeto the terminal.=back=head1 DESCRIPTIONThis script is used to break down the output of datapump. It isdesigned for use with the release manager database.=cut