Subversion Repositories DevTools

Rev

Rev 4094 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#! perl
########################################################################
# Copyright (C) 2007 ERG Limited, All rights reserved
#
# Module name   : jats.sh
# Module type   : Makefile system
# Compiler(s)   : n/a
# Environment(s): jats
#
# Description   : A class to Locate Files
#                 Supportes:
#                   Wilcards
#                   Exclusions
#                   Recursion
#
# Usage:
#               my $search = JatsLocateFiles::new('JatsLocateFiles');
#               my $search = JatsLocateFiles->new();
#                  $search->recurse ( );
#                  $search->filter_in ( );
#                  $search->filter_in_re ( );
#                  $search->filter_out ( );
#                  $search->filter_out_re ( );
#                  $search->base_dir ()
#                  $search->has_filter ( );
#                  $search->full_path ( );
#                  $search->dirs_only ( );
#                  $search->dirs_too();             # Dirs have '/' appended
#                  $search->search ( );
#                  $search->results ( );
#
#......................................................................#

require 5.006_001;
use strict;
use warnings;

package JatsLocateFiles;
use JatsError;
use File::Find;
use ArrayHashUtils;

#-------------------------------------------------------------------------------
# Function        : new
#
# Description     : Create a new instance of a searcher
#
# Inputs          : class           - Class Name
#                   options
#                   Hash of user options
#                   Useful keys are:
#                       'recurse'
#                       'base_dir'
#                       'full_path'
#                       'dirs_only'
#
# Returns         : A reference to the new object of the specified class
#
sub new {
    my $class = shift;
    my $self  = {};
    $self->{full_path} = 0;
    $self->{recurse}  = 0;
    $self->{dirs_only}  = 0;
    $self->{dirs_too} = 0;
    $self->{exclude}  = [];
    $self->{include}  = [];
    $self->{base_dir} = undef;
    $self->{results}  = [];
    bless ($self, $class);

    #
    #   Process user arguments.
    #   These are treated as options. Leading '--' is optional
    #
    foreach ( @_ )
    {
        my $opt = '--' . $_;
        $opt =~ s~^----~--~;
        $self->option ($opt) || Error( "JatsLocateFiles:new. Unknown initialiser: $_");
    }
    return $self;
}

#-------------------------------------------------------------------------------
# Function        : recurse
#                   filter_in
#                   filter_in_re
#                   filter_out
#                   filter_out_re
#                   base_dir
#                   has_filter
#                   results
#                   full_path
#                   dirs_only
#                   dirs_too                    - Include dirs in the search
#
# Description     : Accessor functions
#
# Inputs          : class
#                   One argument (optional)
#
# Returns         : Current value of the data item
#
sub recurse
{
    my $self = shift;
    if (@_) { $self->{recurse} = shift }
    return $self->{recurse};
}

sub full_path
{
    my $self = shift;
    if (@_) { $self->{full_path} = shift }
    return $self->{full_path};
}

sub dirs_only
{
    my $self = shift;
    if (@_) { $self->{dirs_only} = shift }
    return $self->{dirs_only};
}

sub dirs_too
{
    my $self = shift;
    if (@_) { $self->{dirs_too} = shift }
    return $self->{dirs_too};
}

sub filter_in
{
    my $self = shift;
    if (@_) { push @{$self->{include}}, glob2pat( shift ) }
    return $self->{include};
}

sub filter_in_re
{
    my $self = shift;
    if (@_) { push @{$self->{include}}, shift }
    return $self->{include};
}

sub filter_out
{
    my $self = shift;
    if (@_) { push @{$self->{exclude}}, glob2pat( shift ) }
    return $self->{exclude};
}

sub filter_out_re
{
    my $self = shift;
    if (@_) { push @{$self->{exclude}}, shift }
    return $self->{exclude};
}

sub base_dir
{
    my $self = shift;
    if (@_) { $self->{base_dir} = shift }
    return $self->{base_dir};
}

sub has_filter
{
    my $self = shift;
    return ( ( @{$self->{include}} || @{$self->{exclude}} ) );
}

sub results
{
    my $self = shift;
    $self->{results} = \() unless ( exists $self->{results} );
    return wantarray ? @{$self->{results}} : 1 + $#{$self->{results}};
}

#-------------------------------------------------------------------------------
# Function        : option
#
# Description     : Function to simplify the processing of search arguments
#                   Given an argument this function will act on it or
#                   return false
#
# Inputs          : option          - One possible standard search option
#
# Returns         : True            - Option is a search option and its been
#                                     processed
#
sub option
{
    my ($self, $opt) = @_;
    my $result = 1;

    if ( $opt =~ m/^--Dir=(.+)/ ) {
        $self->base_dir ($1);

    } elsif ( $opt =~ m/^--Recurse/ ) {
        $self->recurse(1);

    } elsif ( $opt =~ m/^--NoRecurse/ ) {
        $self->recurse(0);

    } elsif ( $opt =~ m/^--NoFullPath/ ) {              # Default. Returned items relative to search base        
        $self->full_path(0);

    } elsif ( $opt =~ m/^--FullPath/ ) {                # Prepend base path to returned items
        $self->full_path(1);

    } elsif ( $opt =~ m/^--FileListOnly/ ) {            # Default. Return files only 
        $self->full_path(0);
        $self->dirs_only(0);

    } elsif ( $opt =~ m/^--DirListOnly/ ) {             # Return dirs that contain selected files
        $self->dirs_only(1);

    } elsif ( $opt =~ m/^--DirsToo/ ) {                 # Return files and directories
        $self->dirs_too(1);

    } elsif ( $opt =~ m/^--DirsOnly/ ) {                # Ignore non-directory elements    
        $self->dirs_only(2);
        $self->dirs_too(1);

    } elsif ( $opt =~ m/^--FilterIn=(.+)/ ) {
        $self->filter_in ( $1 );

    } elsif ( $opt =~ m/^--FilterInRe=(.+)/ ) {
        $self->filter_in_re ( $1 );

    } elsif ( $opt =~ m/^--FilterOut=(.+)/ ) {
        $self->filter_out ( $1 );

    } elsif ( $opt =~ m/^--FilterOutRe=(.+)/ ) {
        $self->filter_out_re ( $1 );

    } else {
        $result = 0;

    }

    return $result;
}

#-------------------------------------------------------------------------------
# Function        : search
#
# Description     : This function performs the search for files as specified
#                   by the arguments already provided
#
# Inputs          : base_dir (Optional)
#
# Returns         : List of files that match the search criteria
#                   The base directory is not prepended
#                   The list is a simple list of file names
#

my @search_list;             # Must be global to avoid closure problems
my $search_len;
my $search_base_dir;
my $search_dirs_too;
my $search_no_files;

sub search
{
    my $self = shift;
    $self->{base_dir} = $_[0] if (defined $_[0] );
    $self->{results} = ();

    #
    #   Ensure user has provided enough info
    #
    Error ("JatsLocateFiles: No base directory provided") unless ( $self->{base_dir} );

    #
    #   Clean up the user dir. Remove any trailing / as we will be adding it back
    #
    $self->{base_dir} =~ s~/*$~~g;

    #
    #   Init recursion information
    #   Needed to avoid closure interactions
    #
    @search_list = ();
    $search_len = 1 + length( $self->{base_dir} );

    #
    #   Create a list of candidate files
    #   If we are recursing the subtree, then this is a little harder
    #   If we are not recursing then we can't simply glob the directory as
    #   not all files are processed.
    #
    #   Will end up with a list of files that don't include $dir
    #
    if ( -d $self->{base_dir} )
    {
        if ( $self->{recurse} )
        {
            $search_dirs_too = $self->{dirs_too};
            $search_base_dir = $self->{base_dir};
            $search_no_files = ($self->{dirs_only} == 2);
            sub find_file_wanted
            {
                return if ( $search_no_files && ! -d $_ );              # skip if current is not a dir (assume file) and we only match dirs
                return if ( !$search_dirs_too && -d $_ );               # skip if current is dir and we are not including dirs
                return if ( $search_base_dir eq $File::Find::name );    # skip if current is base_dir as we dont include it
                my $file = $File::Find::name;
                $file .= '/' if ( -d $_ && ! $search_no_files);
                push @search_list, substr($file, $search_len );
            }

            #
            #       Under Unix we need to follow symbolic links, but Perl's
            #       Find:find does not work with -follow under windows if the source
            #       path contains a drive letter.
            #
            #       Solution. Only use follow under non-windows systems.
            #                 Works as Windows does not have symlinks (yet).
            #
            my $follow_opt =  ($ENV{GBE_UNIX}  > 0);
        
            File::Find::find( {wanted => \&find_file_wanted, follow_fast => $follow_opt, follow_skip => 2 }, $self->{base_dir} );
        }
        else
        {
            local *DIR ;
            opendir DIR, $self->{base_dir} || die ("Cannot open $self->{base_dir}");
            foreach ( readdir( DIR ) )
            {
                next if /^\Q.\E$/;
                next if /^\Q..\E$/;
                next if ( !$self->{dirs_too} && -d "$self->{base_dir}/$_" );
                push @search_list, $_;
            }
            closedir DIR;
        }
    }


    my @result;
    if ( @{$self->{include}} || @{$self->{exclude}} )
    {
        #
        #   Filtering is present
        #   Apply the filterin rules and then the filter out rules
        #   If no filter-in rules, then assume that all files are allowed in and
        #   simply apply the filter-out rules.
        #
        my @patsin  = map { qr/$_/ } @{$self->{include}};
        my @patsout = map { qr/$_/ } @{$self->{exclude}};

    #    map { print "Include:$_\n"; } @{$self->{include}};
    #    map { print "Exclude:$_\n"; } @{$self->{exclude}};


        file:
        foreach my $rfile ( @search_list )
        {
            my $file = '/' . $rfile;
            if ( @{$self->{include}} )
            {
                my $in = 0;
                for my $pat (@patsin)
                {
                    if ( $file =~ /$pat/ )
                    {
                        $in = 1;
                        last;
                    }
                }
    #print "------- Not included $file\n" unless $in;
                next unless ( $in );
            }

            for my $pat (@patsout)
            {
    #print "------- REJECT $file :: $pat \n" if ( $file =~ /$pat/ );
                next file if ( $file =~ /$pat/ );
            }

            push @result, $rfile;
        }
    }
    else
    {
        @result = @search_list ;
    }
    @search_list = ();
    $self->{results} = [];;

    #
    #   Reattach the base directory, if required
    #       full_path  : Prepend full path
    #       dirs_only  : return list of dirs that have files
    #   Extract dirs only
    #
    foreach  ( @result )
    {
        my $path;
        if ( $self->{full_path} )
        {
            $path = $self->{base_dir} . '/' . $_;
        }
        else
        {
            $path = $_;
        }

        if ( $self->{dirs_only} == 1 )
        {
            $path =~ s~/[^/]*$~~;
        }

        UniquePush( $self->{results}, $path);
    }
#DebugDumpData ("Search", $self);
    return @{$self->{results}};
}

#-------------------------------------------------------------------------------
# Function        : glob2pat
#
# Description     : Convert four shell wildcard characters into their equivalent
#                   regular expression; all other characters are quoted to
#                   render them literals.
#
# Inputs          : Shell style wildcard pattern
#
# Returns         : Perl RE
#

sub glob2pat
{
    my $globstr = shift;
    $globstr =~ s~^/~~;
    my %patmap = (
        '*' => '[^/]*',
        '?' => '[^/]',
        '[' => '[',
        ']' => ']',
        '-' => '-',
    );
    $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
    return '/' . $globstr . '$';
}

#------------------------------------------------------------------------------
1;