Subversion Repositories DevTools

Rev

Rev 6387 | Blame | Compare with Previous | Last modification | View Log | RSS feed

########################################################################
# COPYRIGHT - VIX IP PTY LTD ("VIX"). ALL RIGHTS RESERVED.
#
# Module name   : jats.sh
# Module type   : Makefile system
# Compiler(s)   : n/a
# Environment(s): jats
#
# Description   : A class to Locate Files
#                 Supportes:
#                   Wilcards
#                   Exclusions
#                   Recursion
#
# Usage:
#               my $search = JatsLocateFiles::new('JatsLocateFiles');
#               my $search = JatsLocateFiles->new();
#                  $search->recurse ( );
#                  $search->filter_in ( );          -- Supports multiple arguments
#                  $search->filter_in_re ( );       -- Supports multiple arguments
#                  $search->filter_out ( );         -- Supports multiple arguments
#                  $search->filter_out_re ( );      -- Supports multiple arguments
#                  $search->base_dir ()
#                  $search->has_filter ( );
#                  $search->has_in_filter ( );
#                  $search->has_out_filter ( );
#                  $search->full_path ( );
#                  $search->dirs_only ( );
#                  $search->dirs_too();             # Dirs have '/' appended
#                  $search->search ( );
#                  $search->results ( );
#                  $search->set_list ( arrayRef );
#
#......................................................................#

require 5.006_001;
use strict;
use warnings;

package JatsLocateFiles;
use JatsError;
use File::Find;
use ArrayHashUtils;

#-------------------------------------------------------------------------------
# Function        : new
#
# Description     : Create a new instance of a searcher
#
# Inputs          : class           - Class Name
#                   options
#                   Hash of user options
#                   Useful keys are:
#                       'recurse'
#                       'base_dir'
#                       'full_path'
#                       'dirs_only'
#
# Returns         : A reference to the new object of the specified class
#
sub new {
    my $class = shift;
    my $self  = {};
    $self->{full_path} = 0;
    $self->{recurse}  = 0;
    $self->{dirs_only}  = 0;
    $self->{dirs_too} = 0;
    $self->{exclude}  = [];
    $self->{include}  = [];
    $self->{base_dir} = undef;
    $self->{search_list} = undef;
    $self->{results}  = [];
    bless ($self, $class);

    #
    #   Process user arguments.
    #   These are treated as options. Leading '--' is optional
    #
    foreach ( @_ )
    {
        my $opt = '--' . $_;
        $opt =~ s~^----~--~;
        $self->option ($opt) || Error( "JatsLocateFiles:new. Unknown initialiser: $_");
    }
    return $self;
}

#-------------------------------------------------------------------------------
# Function        : recurse
#                   filter_in
#                   filter_in_re
#                   filter_out
#                   filter_out_re
#                   base_dir
#                   has_filter
#                   has_in_filter;
#                   has_out_filter;
#                   results
#                   full_path
#                   dirs_only
#                   dirs_too                    - Include dirs in the search
#                   set_list
#
# Description     : Accessor functions
#
# Inputs          : class
#                   One argument (optional)
#
# Returns         : Current value of the data item
#
sub recurse
{
    my $self = shift;
    if (@_) { $self->{recurse} = shift }
    return $self->{recurse};
}

sub full_path
{
    my $self = shift;
    if (@_) { $self->{full_path} = shift }
    return $self->{full_path};
}

sub dirs_only
{
    my $self = shift;
    if (@_) { $self->{dirs_only} = shift }
    return $self->{dirs_only};
}

sub dirs_too
{
    my $self = shift;
    if (@_) { $self->{dirs_too} = shift }
    return $self->{dirs_too};
}

sub filter_in
{
    my $self = shift;
    foreach (@_) { push @{$self->{include}}, glob2pat( $_ ) };
    return $self->{include};
}

sub filter_in_re
{
    my $self = shift;
    foreach (@_) { push @{$self->{include}}, $_ };
    return $self->{include};
}

sub filter_out
{
    my $self = shift;
    foreach (@_) { push @{$self->{exclude}}, glob2pat( $_ ) };
    return $self->{exclude};
}

sub filter_out_re
{
    my $self = shift;
    foreach (@_) { push @{$self->{exclude}}, $_ };
    return $self->{exclude};
}

sub base_dir
{
    my $self = shift;
    if (@_) { $self->{base_dir} = shift }
    return $self->{base_dir};
}

sub has_filter
{
    my $self = shift;
    return ( ( @{$self->{include}} || @{$self->{exclude}} ) );
}

sub has_in_filter
{
    my $self = shift;
    return ( @{$self->{include}} );
}

sub has_out_filter
{
    my $self = shift;
    return ( @{$self->{exclude}} );
}

sub results
{
    my $self = shift;
    $self->{results} = \() unless ( exists $self->{results} );
    return wantarray ? @{$self->{results}} : 1 + $#{$self->{results}};
}

sub set_list
{
    my $self = shift;
    if (@_) { $self->{search_list} = shift};
    Error('JatsLocateFiles:set_list expects an ARRAY ref') unless (ref $self->{search_list} eq 'ARRAY' );

}

#-------------------------------------------------------------------------------
# Function        : option
#
# Description     : Function to simplify the processing of search arguments
#                   Given an argument this function will act on it or
#                   return false
#
# Inputs          : option          - One possible standard search option
#
# Returns         : True            - Option is a search option and its been
#                                     processed
#
sub option
{
    my ($self, $opt) = @_;
    my $result = 1;

    if ( $opt =~ m/^--Dir=(.+)/ ) {
        $self->base_dir ($1);

    } elsif ( $opt =~ m/^--Recurse/ ) {
        $self->recurse(1);

    } elsif ( $opt =~ m/^--NoRecurse/ ) {
        $self->recurse(0);

    } elsif ( $opt =~ m/^--NoFullPath/ ) {              # Default. Returned items relative to search base        
        $self->full_path(0);

    } elsif ( $opt =~ m/^--FullPath/ ) {                # Prepend base path to returned items
        $self->full_path(1);

    } elsif ( $opt =~ m/^--FileListOnly/ ) {            # Default. Return files only 
        $self->full_path(0);
        $self->dirs_only(0);

    } elsif ( $opt =~ m/^--DirListOnly/ ) {             # Return dirs that contain selected files
        $self->dirs_only(1);

    } elsif ( $opt =~ m/^--DirsToo/ ) {                 # Return files and directories
        $self->dirs_too(1);

    } elsif ( $opt =~ m/^--DirsOnly/ ) {                # Ignore non-directory elements    
        $self->dirs_only(2);
        $self->dirs_too(1);

    } elsif ( $opt =~ m/^--FilterIn=(.+)/ ) {
        $self->filter_in ( $1 );

    } elsif ( $opt =~ m/^--FilterInRe=(.+)/ ) {
        $self->filter_in_re ( $1 );

    } elsif ( $opt =~ m/^--FilterOut=(.+)/ ) {
        $self->filter_out ( $1 );

    } elsif ( $opt =~ m/^--FilterOutRe=(.+)/ ) {
        $self->filter_out_re ( $1 );

    } elsif ( $opt =~ m/^--SetList=(.+)/ ) {
        $self->set_list ( $1 );

    } else {
        $result = 0;

    }

    return $result;
}

#-------------------------------------------------------------------------------
# Function        : search
#
# Description     : This function performs the search for files as specified
#                   by the arguments already provided
#
# Inputs          : base_dir (Optional)
#
# Returns         : List of files that match the search criteria
#                   The base directory is not prepended
#                   The list is a simple list of file names
#

my @search_list;             # Must be global to avoid closure problems
my $search_len;
my $search_base_dir;
my $search_dirs_too;
my $search_no_files;

sub search
{
    my $self = shift;
    $self->{base_dir} = $_[0] if (defined $_[0] );
    $self->{results} = ();

    #
    #   Ensure user has provided enough info
    #
    Error ("JatsLocateFiles: No base directory provided") unless ( $self->{base_dir} || $self->{search_list} );

    @search_list = ();
    if ( $self->{base_dir} ) {
        #
        #   Clean up the user dir. Remove any trailing / as we will be adding it back
        #
        $self->{base_dir} =~ s~/*$~~g;

        #
        #   Init recursion information
        #   Needed to avoid closure interactions
        #
        $search_len = 1 + length( $self->{base_dir} );

        #
        #   Create a list of candidate files
        #   If we are recursing the subtree, then this is a little harder
        #   If we are not recursing then we can't simply glob the directory as
        #   not all files are processed.
        #
        #   Will end up with a list of files that don't include $dir
        #
        if ( -d $self->{base_dir} )
        {
            if ( $self->{recurse} )
            {
                $search_dirs_too = $self->{dirs_too};
                $search_base_dir = $self->{base_dir};
                $search_no_files = ($self->{dirs_only} == 2);
                sub find_file_wanted
                {
                    return if ( $search_no_files && ! -d $_ );              # skip if current is not a dir (assume file) and we only match dirs
                    return if ( !$search_dirs_too && -d $_ );               # skip if current is dir and we are not including dirs
                    return if ( $search_base_dir eq $File::Find::name );    # skip if current is base_dir as we dont include it
                    my $file = $File::Find::name;
                    $file .= '/' if ( -d $_ && ! $search_no_files);
                    push @search_list, substr($file, $search_len );
                }

                #
                #       Under Unix we need to follow symbolic links, but Perl's
                #       Find:find does not work with -follow under windows if the source
                #       path contains a drive letter.
                #
                #       Solution. Only use follow under non-windows systems.
                #                 Works as Windows does not have symlinks (yet).
                #
                my $follow_opt =  ($ENV{GBE_UNIX}  > 0);
            
                File::Find::find( {wanted => \&find_file_wanted, follow_fast => $follow_opt, follow_skip => 2 }, $self->{base_dir} );
            }
            else
            {
                local *DIR ;
                opendir DIR, $self->{base_dir} || die ("Cannot open $self->{base_dir}");
                foreach ( readdir( DIR ) )
                {
                    next if /^\Q.\E$/;
                    next if /^\Q..\E$/;
                    next if ( !$self->{dirs_too} && -d "$self->{base_dir}/$_" );
                    push @search_list, $_;
                }
                closedir DIR;
            }
        }
    }
    else
    {
        # User has provided the search list to be processed
        $self->{full_path} = 0;
        @search_list = @{$self->{search_list}};
    }


    my @result;
    if ( @{$self->{include}} || @{$self->{exclude}} )
    {
        #
        #   Filtering is present
        #   Apply the filterin rules and then the filter out rules
        #   If no filter-in rules, then assume that all files are allowed in and
        #   simply apply the filter-out rules.
        #
        my @patsin  = map { qr/$_/ } @{$self->{include}};
        my @patsout = map { qr/$_/ } @{$self->{exclude}};

    #    map { print "Include:$_\n"; } @{$self->{include}};
    #    map { print "Exclude:$_\n"; } @{$self->{exclude}};


        file:
        foreach my $rfile ( @search_list )
        {
            my $file = '/' . $rfile;
            if ( @{$self->{include}} )
            {
                my $in = 0;
                for my $pat (@patsin)
                {
                    if ( $file =~ /$pat/ )
                    {
                        $in = 1;
                        last;
                    }
                }
    #print "------- Not included $file\n" unless $in;
                next unless ( $in );
            }

            for my $pat (@patsout)
            {
    #print "------- REJECT $file :: $pat \n" if ( $file =~ /$pat/ );
                next file if ( $file =~ /$pat/ );
            }
            push @result, $rfile;
        }
    }
    else
    {
        @result = @search_list ;
    }
    @search_list = ();
    $self->{results} = [];

    #
    #   Reattach the base directory, if required
    #       full_path  : Prepend full path
    #       dirs_only  : return list of dirs that have files
    #   Extract dirs only
    #
    my %fileList;
    foreach  ( @result )
    {
        my $path;
        if ( $self->{full_path} ) {
            $path = $self->{base_dir} . '/' . $_;
        } else {
            $path = $_;
        }

        if ( $self->{dirs_only} == 1 ) {
            $path =~ s~/[^/]*$~~;
        }

        #
        #   Add to results - if not already present
        #   Dont use UniquePush - its slow over large lists
        #
        unless (exists $fileList{$path} ) {
            push( @{$self->{results}}, $path);
            $fileList{$path} = 1;
        }
    }
#DebugDumpData ("Search", $self);
    return @{$self->{results}};
}

#-------------------------------------------------------------------------------
# Function        : glob2pat
#
# Description     : Convert four shell wildcard characters into their equivalent
#                   regular expression; all other characters are quoted to
#                   render them literals.
#
# Inputs          : Shell style wildcard pattern
#
# Returns         : Perl RE
#

sub glob2pat
{
    my $globstr = shift;
    $globstr =~ s~^/~~;
    my %patmap = (
        '*' => '[^/]*',
        '?' => '[^/]',
        '[' => '[',
        ']' => ']',
        '-' => '-',
    );
    $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
    return '/' . $globstr . '$';
}

#------------------------------------------------------------------------------
1;