#! perl ######################################################################## # Copyright (C) 2007 ERG Limited, All rights reserved # # Module name : jats.sh # Module type : Makefile system # Compiler(s) : n/a # Environment(s): jats # # Description : A class to Locate Files # Supportes: # Wilcards # Exclusions # Recursion # # Usage: # my $search = JatsLocateFiles::new('JatsLocateFiles'); # my $search = JatsLocateFiles->new(); # $search->recurse ( ); # $search->filter_in ( ); # $search->filter_in_re ( ); # $search->filter_out ( ); # $search->filter_out_re ( ); # $search->base_dir () # $search->has_filter ( ); # $search->full_path ( ); # $search->dirs_only ( ); # $search->dirs_too(); # Dirs have '/' appended # $search->search ( ); # $search->results ( ); # #......................................................................# require 5.006_001; use strict; use warnings; package JatsLocateFiles; use JatsError; use File::Find; use ArrayHashUtils; #------------------------------------------------------------------------------- # Function : new # # Description : Create a new instance of a searcher # # Inputs : class - Class Name # options # Hash of user options # Useful keys are: # 'recurse' # 'base_dir' # 'full_path' # 'dirs_only' # # Returns : A reference to the new object of the specified class # sub new { my $class = shift; my $self = {}; $self->{full_path} = 0; $self->{recurse} = 0; $self->{dirs_only} = 0; $self->{dirs_too} = 0; $self->{exclude} = []; $self->{include} = []; $self->{base_dir} = undef; $self->{results} = []; bless ($self, $class); # # Process user arguments. # These are treated as options. Leading '--' is optional # foreach ( @_ ) { my $opt = '--' . $_; $opt =~ s~^----~--~; $self->option ($opt) || Error( "JatsLocateFiles:new. Unknown initialiser: $_"); } return $self; } #------------------------------------------------------------------------------- # Function : recurse # filter_in # filter_in_re # filter_out # filter_out_re # base_dir # has_filter # results # full_path # dirs_only # dirs_too - Include dirs in the search # # Description : Accessor functions # # Inputs : class # One argument (optional) # # Returns : Current value of the data item # sub recurse { my $self = shift; if (@_) { $self->{recurse} = shift } return $self->{recurse}; } sub full_path { my $self = shift; if (@_) { $self->{full_path} = shift } return $self->{full_path}; } sub dirs_only { my $self = shift; if (@_) { $self->{dirs_only} = shift } return $self->{dirs_only}; } sub dirs_too { my $self = shift; if (@_) { $self->{dirs_too} = shift } return $self->{dirs_too}; } sub filter_in { my $self = shift; if (@_) { push @{$self->{include}}, glob2pat( shift ) } return $self->{include}; } sub filter_in_re { my $self = shift; if (@_) { push @{$self->{include}}, shift } return $self->{include}; } sub filter_out { my $self = shift; if (@_) { push @{$self->{exclude}}, glob2pat( shift ) } return $self->{exclude}; } sub filter_out_re { my $self = shift; if (@_) { push @{$self->{exclude}}, shift } return $self->{exclude}; } sub base_dir { my $self = shift; if (@_) { $self->{base_dir} = shift } return $self->{base_dir}; } sub has_filter { my $self = shift; return ( ( @{$self->{include}} || @{$self->{exclude}} ) ); } sub results { my $self = shift; $self->{results} = \() unless ( exists $self->{results} ); return wantarray ? @{$self->{results}} : 1 + $#{$self->{results}}; } #------------------------------------------------------------------------------- # Function : option # # Description : Function to simplify the processing of search arguments # Given an argument this function will act on it or # return false # # Inputs : option - One possible standard search option # # Returns : True - Option is a search option and its been # processed # sub option { my ($self, $opt) = @_; my $result = 1; if ( $opt =~ m/^--Dir=(.+)/ ) { $self->base_dir ($1); } elsif ( $opt =~ m/^--Recurse/ ) { $self->recurse(1); } elsif ( $opt =~ m/^--NoRecurse/ ) { $self->recurse(0); } elsif ( $opt =~ m/^--NoFullPath/ ) { # Default. Returned items relative to search base $self->full_path(0); } elsif ( $opt =~ m/^--FullPath/ ) { # Prepend base path to returned items $self->full_path(1); } elsif ( $opt =~ m/^--FileListOnly/ ) { # Default. Return files only $self->full_path(0); $self->dirs_only(0); } elsif ( $opt =~ m/^--DirListOnly/ ) { # Return dirs that contain selected files $self->dirs_only(1); } elsif ( $opt =~ m/^--DirsToo/ ) { # Return files and directories $self->dirs_too(1); } elsif ( $opt =~ m/^--DirsOnly/ ) { # Ignore non-directory elements $self->dirs_only(2); $self->dirs_too(1); } elsif ( $opt =~ m/^--FilterIn=(.+)/ ) { $self->filter_in ( $1 ); } elsif ( $opt =~ m/^--FilterInRe=(.+)/ ) { $self->filter_in_re ( $1 ); } elsif ( $opt =~ m/^--FilterOut=(.+)/ ) { $self->filter_out ( $1 ); } elsif ( $opt =~ m/^--FilterOutRe=(.+)/ ) { $self->filter_out_re ( $1 ); } else { $result = 0; } return $result; } #------------------------------------------------------------------------------- # Function : search # # Description : This function performs the search for files as specified # by the arguments already provided # # Inputs : base_dir (Optional) # # Returns : List of files that match the search criteria # The base directory is not prepended # The list is a simple list of file names # my @search_list; # Must be global to avoid closure problems my $search_len; my $search_base_dir; my $search_dirs_too; my $search_no_files; sub search { my $self = shift; $self->{base_dir} = $_[0] if (defined $_[0] ); $self->{results} = (); # # Ensure user has provided enough info # Error ("JatsLocateFiles: No base directory provided") unless ( $self->{base_dir} ); # # Clean up the user dir. Remove any trailing / as we will be adding it back # $self->{base_dir} =~ s~/*$~~g; # # Init recursion information # Needed to avoid closure interactions # @search_list = (); $search_len = 1 + length( $self->{base_dir} ); # # Create a list of candidate files # If we are recursing the subtree, then this is a little harder # If we are not recursing then we can't simply glob the directory as # not all files are processed. # # Will end up with a list of files that don't include $dir # if ( -d $self->{base_dir} ) { if ( $self->{recurse} ) { $search_dirs_too = $self->{dirs_too}; $search_base_dir = $self->{base_dir}; $search_no_files = ($self->{dirs_only} == 2); sub find_file_wanted { return if ( $search_no_files && ! -d $_ ); # skip if current is not a dir (assume file) and we only match dirs return if ( !$search_dirs_too && -d $_ ); # skip if current is dir and we are not including dirs return if ( $search_base_dir eq $File::Find::name ); # skip if current is base_dir as we dont include it my $file = $File::Find::name; $file .= '/' if ( -d $_ && ! $search_no_files); push @search_list, substr($file, $search_len ); } # # Under Unix we need to follow symbolic links, but Perl's # Find:find does not work with -follow under windows if the source # path contains a drive letter. # # Solution. Only use follow under non-windows systems. # Works as Windows does not have symlinks (yet). # my $follow_opt = ($ENV{GBE_UNIX} > 0); File::Find::find( {wanted => \&find_file_wanted, follow_fast => $follow_opt, follow_skip => 2 }, $self->{base_dir} ); } else { local *DIR ; opendir DIR, $self->{base_dir} || die ("Cannot open $self->{base_dir}"); foreach ( readdir( DIR ) ) { next if /^\Q.\E$/; next if /^\Q..\E$/; next if ( !$self->{dirs_too} && -d "$self->{base_dir}/$_" ); push @search_list, $_; } closedir DIR; } } my @result; if ( @{$self->{include}} || @{$self->{exclude}} ) { # # Filtering is present # Apply the filterin rules and then the filter out rules # If no filter-in rules, then assume that all files are allowed in and # simply apply the filter-out rules. # my @patsin = map { qr/$_/ } @{$self->{include}}; my @patsout = map { qr/$_/ } @{$self->{exclude}}; # map { print "Include:$_\n"; } @{$self->{include}}; # map { print "Exclude:$_\n"; } @{$self->{exclude}}; file: foreach my $rfile ( @search_list ) { my $file = '/' . $rfile; if ( @{$self->{include}} ) { my $in = 0; for my $pat (@patsin) { if ( $file =~ /$pat/ ) { $in = 1; last; } } #print "------- Not included $file\n" unless $in; next unless ( $in ); } for my $pat (@patsout) { #print "------- REJECT $file :: $pat \n" if ( $file =~ /$pat/ ); next file if ( $file =~ /$pat/ ); } push @result, $rfile; } } else { @result = @search_list ; } @search_list = (); $self->{results} = [];; # # Reattach the base directory, if required # full_path : Prepend full path # dirs_only : return list of dirs that have files # Extract dirs only # foreach ( @result ) { my $path; if ( $self->{full_path} ) { $path = $self->{base_dir} . '/' . $_; } else { $path = $_; } if ( $self->{dirs_only} == 1 ) { $path =~ s~/[^/]*$~~; } UniquePush( $self->{results}, $path); } #DebugDumpData ("Search", $self); return @{$self->{results}}; } #------------------------------------------------------------------------------- # Function : glob2pat # # Description : Convert four shell wildcard characters into their equivalent # regular expression; all other characters are quoted to # render them literals. # # Inputs : Shell style wildcard pattern # # Returns : Perl RE # sub glob2pat { my $globstr = shift; $globstr =~ s~^/~~; my %patmap = ( '*' => '[^/]*', '?' => '[^/]', '[' => '[', ']' => ']', '-' => '-', ); $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge; return '/' . $globstr . '$'; } #------------------------------------------------------------------------------ 1;