Subversion Repositories DevTools

Rev

Blame | Last modification | View Log | RSS feed

#!/usr/bin/perl -w

########################################################################
# Copyright (C) 2008 ERG Limited, All rights reserved
#
# Module name   : schemadump.pl
# Module type   : Standalone utility
# Compiler(s)   : n/a
# Environment(s): windows (dos), unix (solaris and linux)
#
# Description   : Extracts a directory structure from the output of
#                 datapump.  This is useful for creating files to
#                 be put into source control from a database.
#
# History       : Created by Haydon Knight April 2008
#
# Usage         : schemadump.pl [options] -src=<srcFile> -dest=<destDir>
#
########################################################################

#######################################################################
# Use lines
#######################################################################
require 5.6.1;
use strict;
use warnings;
use Pod::Usage;                             # required for help support
use Getopt::Long;

use commonExports; # see commonExports.pm in this directory

#######################################################################
# Function prototypes
#######################################################################

sub startupChecks();
sub parseCommandLine();
sub main();

sub getSubSectionLiness(\@);
sub getFullDirs(\@);
sub getSubDirs(@);
sub writeData(\@\@);
sub getFileLines($\$@);
sub getFilename($@);
sub writeDDLHeader(@);

sub packageBodySplit(\$@);
sub splitOnBlank(\$@);
sub splitOnSlash(\$@);
sub splitOnSemiColon(\$@);
sub splitOnCreate(\$@);

#######################################################################
# Constant global variables
#######################################################################

my $VERSION = "1.0.1";

#######################################################################
# Other global variables
#######################################################################

my $srcFile;
my $destDir;

#######################################################################
# Main code
#######################################################################

parseCommandLine();
startupChecks();
main();
finish();

#######################################################################
# Function definitions
#######################################################################

#-------------------------------------------------------------------------------
# Function  : main
#
# Purpose   : Main function of this script
#
# Arguments : none
#
# Returns   : none
#
# Notes     : Does everything but setting up at start and tearing down at end.
#
sub main()
{
    my @lines = readArray( $srcFile ); # all the lines in the datapump dump file

    mkdir "$destDir/bookKeeping" unless -d "$destDir/bookKeeping";

    writeDDLHeader( @lines );

    # getFullDirs() returns an array of strings.
    # Each of these is of the form '<DIRY_1>/<DIRY_2>/.../<DIRY_N>.
    # e.g. one element is 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS'. 
    my @fullDirs = getFullDirs( @lines ); 

    # getSubDirs() returns an array of strings.
    # Each of these is of the form '<diry_1>/<diry_2>'.
    # e.g. the element formed from 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS' is
    # 'table/index_statistics'.
    my @subDirs = getSubDirs( @fullDirs );

    # This is a 2-D array - i.e. it is an array of (references to arrays)
    # Each reference to an array corresponds to an element of @subDirs.
    # For each of these there is an array of the lines corresponding to that DDL-data-type.
    my @subSectionLiness = getSubSectionLiness(@lines);

    die "BUG!  Did not get the same number of sub directories as blocks of subsection lines (" .
        scalar(@subDirs) . " vs " . scalar(@subSectionLiness) . ")\n"
            unless scalar(@subDirs) == scalar(@subSectionLiness);

    writeArray( "$destDir/bookKeeping/objectTypes", @fullDirs);
    createDirectories( $destDir, @subDirs );

    my $linesWritten = writeData( @subDirs, @subSectionLiness );

    # Minus one for the very first line: the '-- CONNECT RELEASE_MANAGER' line
    my $linesDesired = scalar(@lines) - scalar(@subDirs) - 1;

    die "Internal logic error!  Desired to write out $linesDesired lines but wrote out " . 
        " $linesWritten lines\n"
    unless $linesDesired == $linesWritten;
}


#-------------------------------------------------------------------------------
# Function  : writeDDLHeader
#
# Purpose   : Writes the first line of the enormous datapump dump file to a file
#             This line is used by ddlfile.pl to reconstruct the dump file.
#
# Arguments : @lines (i) - all the lines from the datapump dump file
#
# Returns   : nothing
#
# Notes     :
#
sub writeDDLHeader(@)
{
    my @lines = @_;

    die "No lines read from input file\n" unless @lines;

    my $connectLine = $lines[0];

    die "Could not read the string 'CONNECT' from first line of input file ('$connectLine')\n"
        unless $connectLine =~ m~^-- CONNECT\s+~;
    
    writeArray( "$destDir/bookKeeping/DDLHeader", ($connectLine) );   
}


#-------------------------------------------------------------------------------
# Function  : getSubSectionLiness
#
# Purpose   : Constructs a 2D array of lines in each DDL-type
#             i.e. for each DDL-type (A DDL-type is a sequence or a package or a table etc)
#             there is an array of lines
#
# Arguments : $refLines (i) - Reference to @lines - the list of all lines in the datapump dump file
#
# Returns   : @subSectionLiness - an array of references to arrays
#
# Notes     : Simply splits on lines containing $subSectionSplitter 
#
sub getSubSectionLiness(\@)
{
    my ($refLines) = @_;
    
    my @subSectionLiness;
    my @tempList;
    my $isFirstTime = 1;

    foreach my $line (@$refLines)
    {
        if( $line =~ m~^$subSectionSplitter~ )
        {
            push @subSectionLiness, [ @tempList ] unless $isFirstTime;
            $isFirstTime = 0;
            @tempList = ();
        }
        else
        {
            push @tempList, $line;
        }
    }
    push @subSectionLiness, [ @tempList ];
    
    logprint "Got " . scalar(@subSectionLiness) . " sets of subSection Lines";

    return @subSectionLiness;
}


#-------------------------------------------------------------------------------
# Function  : getFullDirs
#
# Purpose   : Constructs an array of directory paths
#
# Arguments : $refLines (i) - reference to @lines - all lines in datapump dump file
#
# Returns   : @fullDirys - array of directory paths
#
# Notes     : Reads in directory paths from the '--new object' lines in the datapump dump file
#             The directory paths have extra subdirectories than what is actually mkdir'ed
#             Subsequent functionality (getTopBotDirs() for example) strips out these
#             middle subdirectories
#
sub getFullDirs(\@)
{
    my ($refLines) = @_;

    ###############################################################
    # extract out from lines starting with '-- new object'
    # strings of the form 'TABLE/INDEX/STATISTICS/INDEX_STATISTICS'
    my @fullDirys;

    foreach my $line (@$refLines)
    {
        next unless $line =~ m~^$subSectionSplitter~;
        (my $fullDiry = $line) =~ s~^$subSectionSplitter~~;
        push @fullDirys, $fullDiry;

        logprint "Got fullDiry '$fullDirys[-1]'";
    }

    logprint "Got " . scalar(@fullDirys)  . " fullDirys contributing to subDirs\n";

    return @fullDirys;
}


#-------------------------------------------------------------------------------
# Function  : getSubDirs
#
# Purpose   : Constructs the list of subdirectories that will be mkdir'ed and filled
#             with .sql files
#
# Arguments : @fullDirys (i) - list of directories parsed from datapump dump file
#
# Returns   : @subDirs - list of directories that will exist if script finishes successfully
#
# Notes     :
#
sub getSubDirs(@)
{
    my @fullDirys = @_;

    my @topDirs;
    my @botDirs;

    foreach my $fullDiry (@fullDirys)
    {
        my ($topDir, $botDir) = getTopBotDirs( $fullDiry );

        push @topDirs, $topDir;
        push @botDirs, $botDir;
    }

    ###############################################################
    # combine top/bot dirs
    my @subDirs;

    for( my $i = 0; $i < scalar(@topDirs); $i++)
    {
        push @subDirs, "$topDirs[$i]/$botDirs[$i]";
        logprint "Pushed subDir $i: '$subDirs[-1]'";
    }

    ###############################################################
    # convert to lower case
    foreach my $subDir (@subDirs)
    {
        $subDir =~ tr~A-Z~a-z~;
    }

    return @subDirs;
}


#-------------------------------------------------------------------------------
# Function  : parseCommandLine
#
# Purpose   : Parses command line; invokes help if necessary
#
# Arguments : nothing
#
# Returns   : nothing
#
# Notes     : Sets up various global variables; these are not checked here - they should be
#             checked in startupChecks()
#
sub parseCommandLine()
{
    my $opt_help = 0;
    my $opt_manual = 0;

    my $result = GetOptions (
    "help"          => \$opt_help,              # flag
    "manual"        => \$opt_manual,            # flag
    "verbose+"      => \$verbose,               # flag
    "src=s"         => \$srcFile,               # String
    "dest=s"        => \$destDir,               # String
    "log=s"         => \$logFile,               # String
    );

    pod2usage(-verbose => 2) if( $opt_manual );
    pod2usage(-verbose => 0, -message => "Version: $VERSION") if ($opt_help > 0 || ! $result );
}


#-------------------------------------------------------------------------------
# Function  : startupChecks
#
# Purpose   : Checks that important variables are set sanely
#
# Arguments : nothing
#
# Returns   : nothing
#
# Notes     : Calls die() if things aren't set sanely.
#             This function opens the logfile.
#
sub startupChecks()
{
    die "You need to specify a source file using '-src=<srcFile>'\n" unless $srcFile;
    die "You need to specify an existing destination directory using '-dest=<destDir>'\n"
        unless $destDir;

    $srcFile =~ s~\\~/~g;
    $destDir =~ s~\\~/~g;

    die "Source file '$srcFile' does not exist\n" if( ! -e $srcFile || -d $srcFile );
    die "Destination directory '$destDir' does not exist\n" unless -d $destDir;

    openLog();
}


#-------------------------------------------------------------------------------
# Function  : getFileLines
#
# Purpose   : Extracts from a set of lines only those lines for each DDL-data-object.
#             e.g. extracts lines for a package from the lines corresponding to all packages.
#             This function also increments the line counter.
#
# Arguments : $subDir (i) - The subdirectory the DDL-data-object is being created in
#             $refIline (io) - Reference to $iline - how far through the current lineset
#                              we currently are.  (each lineset might correspond to all
#                              packages or all tables or all views etc)
#             @lines (i)     - The current lineset
#
# Returns   : $addedExtraLines - boolean value; true indicates that an 'ALTER PACKAGE'
#                                DDL-data-object was appended onto the list of lines for this
#                                DDL-data-object.  This only occurs for some 'PACKAGE_BODY'
#                                objects in the release manager database.
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : Invokes helper functions for each type of splitting.  These functions are
#             tailored to the release manager database and may
#             need to be altered for other databases.
#
sub getFileLines($\$@)
{
    my ($subDir, $refIline, @lines) = @_;

    # Skip over blank lines
    $$refIline++ while( $$refIline < scalar(@lines) && $lines[$$refIline] =~ m~^\s*$~ );

    return (0,"end of data") if( $$refIline >= scalar(@lines) );

    my %fileLinesFunctions = (
        qq(type/type)                    => \&splitOnSlash,
        qq(function/function)            => \&splitOnSlash,
        qq(procedure/procedure)          => \&splitOnSlash,
        qq(package/package_body)         => \&packageBodySplit,
        qq(package/package_spec)         => \&splitOnSlash,
        qq(synonym/synonym)              => \&splitOnBlank,
        qq(sequence/sequence)            => \&splitOnBlank,
        qq(table/table)                  => \&splitOnBlank,
        qq(view/view)                    => \&splitOnSemiColon,
        qq(table/index)                  => \&splitOnCreate,
        qq(table/se_tbl_fbm_index_index) => \&splitOnCreate);
    
    die "Internal coding error: could not find a case for subdirectory" .
        " '$subDir' in the fileLinesFunctions hash table\n"
        unless $fileLinesFunctions{$subDir};

    return $fileLinesFunctions{$subDir}->( $refIline, @lines );
}


#-------------------------------------------------------------------------------
# Function  : splitOnBlank
#
# Purpose   : Extracts out the next DDL object from a set of lines by
#             adding lines until a delineating blank line is found
#
# Arguments : $refIline (io) - reference to $iline - line to start searching on
#             @lines (i) - Array to search through
#
# Returns   : $addedExtraLines - always 0
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : 
#
sub splitOnBlank(\$@)
{
    my ($refIline, @lines) = @_;

    my @fileLines;

    # Keep adding lines until we get a blank line
    while( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~^\s*$~ )
    {
        push @fileLines, $lines[$$refIline];
        $$refIline++;
    }
    
    # Skip past the blank line
    $$refIline++ if $$refIline < scalar(@lines);

    return (0, @fileLines);
}


#-------------------------------------------------------------------------------
# Function  : splitOnCreate
#
# Purpose   : Extracts out the next DDL object from a set of lines by
#             searching for the next 'CREATE' keyword.
#
# Arguments : $refIline (io) - reference to $iline - line to start searching on
#             @lines (i) - Array to search through
#
# Returns   : $addedExtraLines - always 0
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : 
#
sub splitOnCreate(\$@)
{
   my ($refIline, @lines) = @_;

   my @fileLines;

   my $gotFirstCreateLine = 0;

   # Keep adding lines until we reach the second line starting with 'CREATE':
   while( $$refIline < scalar(@lines) &&
          ( !$gotFirstCreateLine || $lines[$$refIline] !~ m~^\s*CREATE~) )
   {
       push @fileLines, $lines[$$refIline];
       $gotFirstCreateLine = 1 if $lines[$$refIline] =~ m~^\s*CREATE~;
       $$refIline++;
   } 

   return (0, @fileLines);
}


#-------------------------------------------------------------------------------
# Function  : splitOnSemiColon
#
# Purpose   : Extracts out the next DDL object from a set of lines by
#             adding lines until a terminating semicolon is found
#
# Arguments : $refIline (io) - reference to $iline - line to start searching on
#             @lines (i) - Array to search through
#
# Returns   : $addedExtraLines - always 0
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : 
#
sub splitOnSemiColon(\$@)
{
    my ($refIline, @lines) = @_;

    my @fileLines;

    # Keep adding lines until we get a line ending with a semicolon
    while( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~;\s*$~ )
    {
        push @fileLines, $lines[$$refIline];
        $$refIline++;
    }

    # Add the line with the semicolon
    if( $$refIline < scalar(@lines) )
    {
        push @fileLines, $lines[$$refIline];
        $$refIline++;
    }

    return (0, @fileLines);
}


#-------------------------------------------------------------------------------
# Function  : splitOnSlash
#
# Purpose   : Extracts out the next DDL object from a set of lines by
#             adding lines until a terminating slash is found
#
# Arguments : $refIline (io) - reference to $iline - line to start searching on
#             @lines (i) - Array to search through
#
# Returns   : $addedExtraLines - always 0
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : 
#
sub splitOnSlash(\$@)
{
    my ($refIline, @lines) = @_;

    my @fileLines;

    # Keep adding lines until we get a line starting with a slash
    while( $$refIline < scalar(@lines) && $lines[$$refIline] !~ m~^/\s*$~ )
    {
        push @fileLines, $lines[$$refIline];
        $$refIline++;
    }

    # Add the line with the slash
    if( $$refIline < scalar(@lines) )
    {
        push @fileLines, $lines[$$refIline];
        $$refIline++;
    }

    return (0, @fileLines);
}


#-------------------------------------------------------------------------------
# Function  : packageBodySplit
#
# Purpose   : Extracts out the next DDL object from a set of lines by
#             adding lines until a terminating slash is found.  It then
#             adds extra lines from an 'ALTER PACKAGE' directive
#             if such a directive immediately follows the slash.
#
# Arguments : $refIline (io) - reference to $iline - line to start searching on
#             @lines (i) - Array to search through
#
# Returns   : $addedExtraLines - always 0
#             @fileLines - The lines corresponding to the DDL-data-object
#
# Notes     : 
#
sub packageBodySplit(\$@)
{
    my ($refIline, @lines) = @_;
   
    my ($addedExtraLines, @fileLines) = splitOnSlash($$refIline, @lines);

    # Keep looking for lines if there is a 'ALTER PACKAGE' in the next
    # slash-delimited region
    if( @fileLines && $$refIline < scalar(@lines) )
    {
        my $keyWord = $fileLines[0];
        $keyWord =~ s~^\s*CREATE\s+PACKAGE\s+BODY\s+".*?"\."(.*?)".*~$1~;
        
        my @extraLines;
        my $jline = $$refIline;
        
        # Skip over blank lines
        $jline++ while( $jline < scalar(@lines) && $lines[$jline] =~ m~^\s*$~ );

        # add lines to @extraLines until we find a slash
        while( $jline < scalar(@lines) && $lines[$jline] !~ m~^/\s*$~ )
        {
            push @extraLines, $lines[$jline];
            $jline++;
        }
        
        # add in the line with the slash to @extraLines
        if( $jline < scalar(@lines) )
        {
            push @extraLines, $lines[$jline];
            $jline++;
        }
        
        if( @extraLines && 
            $extraLines[0] =~ m~^\s*ALTER\s+PACKAGE\s+".*?"\."${keyWord}"~ )
        {
            # Yes!  Add in the 'ALTER PACKAGE' text
            $$refIline = $jline;
            push @fileLines, @extraLines;
            $addedExtraLines = 1;
        }
    }

    return ($addedExtraLines, @fileLines); 
}


#-------------------------------------------------------------------------------
# Function  : getFilename
#
# Purpose   : Extracts the name of the .sql file to create from its lines
#
# Arguments : $subDir (i) - The subdirectory where the .sql file will be created
#             @fileLines (i) - The lines to be parsed for the filename
#
# Returns   : $keyWord - the filename to be created (doesn't have any path attached but
#             does have a .sql extension)
#
# Notes     : This function is tailored specifically for the release manager database, so cannot be
#             guaranteed to work for other databases
#
sub getFilename($@)
{
    my ($subDir, @fileLines) = @_;

    return "allData.sql" unless defined $wantToExtract{$subDir};

    die "Bug!  No lines scanned for subDir '$subDir'\n" unless scalar(@fileLines);

    my $keyWord = $fileLines[0];

    # Extract out the word that is in the second double quotes
    # e.g. for the line:
    #CREATE SYNONYM "RELEASE_MANAGER"."APPLICATIONS" FOR "ACCESS_MANAGER"."APPLICATIONS";
    # extract out 'APPLICATIONS'
    $keyWord =~ s~^.*?".*?"\."(.*?)".*~$1~;

    if( $subDir eq "type/type" )
    {
        my $firstWord = $fileLines[0];
        # $firstWord literally stores the first word of the line
        # which for 'type/type' could be 'CREATE' or 'ALTER'
        $firstWord =~ s~^\s*(\S+).*~$1~;
        $keyWord .= "_$firstWord";
    }

    $keyWord .= ".sql";
    $keyWord =~ tr~A-Z~a-z~;
    
    logprint "getFilename() returning '$keyWord' for subdir '$subDir'";

    return $keyWord;
}


#-------------------------------------------------------------------------------
# Function  : writeData
#
# Purpose   : Write out files to disk
#
# Arguments : $refSubDirs (i) - reference to @subDirs - list of subdirectories having
#                               files created.
#             $refSubSectionLiness (i) - reference to @subSectionLiness (2D array of lines
#                                        per each DDL-data-type)
#
# Returns   : $linesWritten - a count of the lines written                           
#
# Notes     : This is a major function as it calls getFileLines() repeatedly to break
#             down line-list within @subSectionLiness and also performs a lot of loops.
#             So really does a lot more than just writing data.
#
#             This function also records to file which of the files have had 'extra' bits
#             written to them.  (For release manager these are a few 'PACKAGE BODY' functions
#             that have had 'ALTER PACKAGE' added on).  This is so that ddlfile.pl can break
#             these back into two DDL-data-objects and re-delimit them with a slash.
#
#             This function also writes out an index file of all filenames created.
#             This allows the order in which the files were in the original datapump dump
#             file to be recorded.
#
sub writeData(\@\@)
{
    my ($refSubDirs, $refSubSectionLiness) = @_;
    
    my $linesWritten = 0;

    my @filesWithExtras;
    my @filenamesCreated;

    for( my $isubdir = 0; $isubdir < scalar(@$refSubDirs); $isubdir++)
    {
        # Simplify by copying the array of lines for this subdirectory into '@lines'
        my @lines = @{ $$refSubSectionLiness[$isubdir] };

        logprint "Working with subdir '$$refSubDirs[$isubdir]'";

        # Recall that wantToExtract hash indicates whether or not we want to
        # write one or many (i.e. extract them out) files to each subdirectory.
        if( $wantToExtract{$$refSubDirs[$isubdir]} ) 
        {
            logprint "Going to separate into separate files for subdir '$$refSubDirs[$isubdir]'";

            my $iline = 0;
            my %filenames;
        
            # Keep extracting out lines for files until we run out
            until( $iline >= scalar(@lines) )
            {
                my ($addedExtraLines, @fileLines) = 
                    getFileLines( $$refSubDirs[$isubdir], $iline, @lines);

                last if( scalar(@fileLines) == 1 && $fileLines[0] eq "end of data" );
        
                my $filename = getFilename( $$refSubDirs[$isubdir], @fileLines );
                my $fullname = "$destDir/$$refSubDirs[$isubdir]/$filename";

                push @filesWithExtras, "$$refSubDirs[$isubdir]/$filename" if $addedExtraLines;
        
                die "Duplicate filename '$fullname'" if $filenames{$filename};
                $filenames{$filename} = 1;

                writeArray( $fullname, @fileLines);        
                push @filenamesCreated, "$$refSubDirs[$isubdir]/$filename";
            }
        }
        else
        {
            logprint "Writing out just one file in subdir '$$refSubDirs[$isubdir]'";
            writeArray( "$destDir/$$refSubDirs[$isubdir]/allData.sql", @lines);
            push @filenamesCreated, "$$refSubDirs[$isubdir]/allData.sql";
        }

        $linesWritten += scalar( @lines );
    }    

    writeArray( "$destDir/bookKeeping/filesWithExtras", @filesWithExtras);
    writeArray( "$destDir/bookKeeping/orderedFiles", @filenamesCreated );

    return $linesWritten;
}


#######################################################################
# Documentation
#######################################################################

=pod

=head1 NAME

schemadump.pl - creates clearcase directory structure from datapump output

=head1 SYNOPSIS

schemadump.pl [options] -src=<srcFile> -dest=<destDir>

Options:

  -help              - brief help message
  -man               - Full documentation
  -src=srcFile       - File that stores the output of datapump (mandatory)
  -dest=destDir      - Directory to unpack to (mandatory)
  -log=logFile       - Log messages to this file

=head1 OPTIONS

=over 8

=item B<-help>

Print a brief help message and exits.

=item B<-man>

Prints the manual page and exits.

=item B<-src=srcFile>

Specify what file to extract out the clearcase files from.

=item B<-dest=destDir>

Specify where to unpack the clearcase files to.  This must be an
existing directory.

=item B<-log=logFile>

Specify a file to write log messages to.  Default is to just write
to the terminal.

=back

=head1 DESCRIPTION

This script is used to break down the output of datapump.  It is
designed for use with the release manager database.

=cut