Subversion Repositories DevTools

Rev

Rev 369 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
229 dpurdie 1
#!/usr/bin/env perl
2
# cloc -- Count Lines of Code {{{1
3
# Copyright (C) 2006 Northrop Grumman Corporation
4
# Author:  Al Danial <al.danial@gmail.com>
5
#          First release August 2006
6
#
7
# Includes code from:
8
#   - SLOCCount v2.26 
9
#     http://www.dwheeler.com/sloccount/
10
#     by David Wheeler.
11
#   - Regexp::Common v2.120
12
#     http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm
13
#     by Damian Conway and Abigail
14
#
15
# This program is free software; you can redistribute it and/or modify
16
# it under the terms of the GNU General Public License as published by
17
# the Free Software Foundation; either version 2 of the License, or
18
# (at your option) any later version.
19
#
20
# This program is distributed in the hope that it will be useful,
21
# but WITHOUT ANY WARRANTY; without even the implied warranty of
22
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
# GNU General Public License for more details:
24
# http://www.gnu.org/licenses/gpl.txt
25
#
26
# You should have received a copy of the GNU General Public License
27
# along with this program; if not, write to the Free Software
28
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29
# 1}}}
30
my $VERSION = 1.00;
31
require 5.006;
32
# use modules                                  {{{1
33
use warnings;
34
use strict;
35
use Getopt::Long;
36
use File::Basename;
37
use File::Temp qw { tempfile tempdir };
38
use File::Find;
39
use File::Path;
40
use IO::File;
41
 
42
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
43
my $HAVE_Digest_MD5 = 0;
44
eval "use Digest::MD5;";
45
if (defined $Digest::MD5::VERSION) {
46
    $HAVE_Digest_MD5 = 1;
47
} else {
48
    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
49
}
50
 
51
my $HAVE_Rexexp_Common;
52
# Regexp::Common also isn't in the standard distribution.  It will
53
# be installed in a temp directory if necessary.
54
BEGIN {
55
    if (eval "use Regexp::Common;") {
56
        $HAVE_Rexexp_Common = 1;
57
    } else {
58
        $HAVE_Rexexp_Common = 0;
59
    }
60
}
61
 
62
# Uncomment next two lines when building Windows executable with perl2exe
63
# or if running on a system that already has Regexp::Common.
64
#use Regexp::Common;
65
#$HAVE_Rexexp_Common = 1;
66
 
67
#perl2exe_include "Regexp/Common/whitespace.pm"
68
#perl2exe_include "Regexp/Common/URI.pm"
69
#perl2exe_include "Regexp/Common/URI/fax.pm"
70
#perl2exe_include "Regexp/Common/URI/file.pm"
71
#perl2exe_include "Regexp/Common/URI/ftp.pm"
72
#perl2exe_include "Regexp/Common/URI/gopher.pm"
73
#perl2exe_include "Regexp/Common/URI/http.pm"
74
#perl2exe_include "Regexp/Common/URI/pop.pm"
75
#perl2exe_include "Regexp/Common/URI/prospero.pm"
76
#perl2exe_include "Regexp/Common/URI/news.pm"
77
#perl2exe_include "Regexp/Common/URI/tel.pm"
78
#perl2exe_include "Regexp/Common/URI/telnet.pm"
79
#perl2exe_include "Regexp/Common/URI/tv.pm"
80
#perl2exe_include "Regexp/Common/URI/wais.pm"
81
#perl2exe_include "Regexp/Common/CC.pm"
82
#perl2exe_include "Regexp/Common/SEN.pm"
83
#perl2exe_include "Regexp/Common/number.pm"
84
#perl2exe_include "Regexp/Common/delimited.pm"
85
#perl2exe_include "Regexp/Common/profanity.pm"
86
#perl2exe_include "Regexp/Common/net.pm"
87
#perl2exe_include "Regexp/Common/zip.pm"
88
#perl2exe_include "Regexp/Common/comment.pm"
89
#perl2exe_include "Regexp/Common/balanced.pm"
90
#perl2exe_include "Regexp/Common/lingua.pm"
91
#perl2exe_include "Regexp/Common/list.pm"
92
#perl2exe_include "File/Glob.pm"
93
 
94
use Text::Tabs qw { expand };
95
#use Data::Dumper::Simple;
96
#use Data::Dumper;
97
use Cwd qw { cwd };
98
# 1}}}
99
# Usage information, options processing.       {{{1
100
my $script = basename $0;
101
my $usage  = "
102
Usage: $script [options] <file(s)/dir(s)> | <report files>
103
 
104
 Count physical lines of source code in the given files and/or
105
 recursively below the given directories.
106
 
107
 Options:
108
   --by-file                 Report results for every source file encountered
109
                             in addition to reporting by language.
110
   --categorized=<file>      Save names of categorized files to <file>.
111
   --counted=<file>          Save names of processed source files to <file>.
112
   --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
113
                             D1, D2, D3, et cetera, from being scanned.  For
114
                             example  --exclude-dir=.cvs,.svn  will skip
115
                             all files that have /.cvs/ or /.svn/ as part of 
116
                             their path.
117
   --exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
118
                             L1, L2, L3, et cetera, from being counted.
119
   --extract-with=<cmd>      Use <cmd> to extract binary archive files (e.g.:
120
                             .tar.gz, .zip, .Z).  Use the literal '>FILE<' as 
121
                             a stand-in for the actual file(s) to be
122
                             extracted.  For example, to count lines of code
123
                             in the input files 
124
                                gcc-4.2.tar.gz  perl-5.8.8.tar.gz  
125
                             on Unix use  
126
                               --extract-with='gzip -dc >FILE< | tar xfv -'
127
                             and on Windows use: 
128
                               --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
129
                             (if you have WinZip installed there).
130
   --force-lang=<lang>,<ext> Process all files that have a <ext> extension 
131
                             with the counter for language <lang>.  For 
132
                             example, to count all .f files with the 
133
                             Fortran 90 counter (which expects files to 
134
                             end with .f90) instead of the default Fortran 77 
135
                             counter, use
136
                               --force-lang=\"Fortran 90\",f
137
                             The language name is case insensitive.  This 
138
                             option can be specified multiple times.
139
   --found=<file>            Save names of every file found to <file>.
140
   --ignored=<file>          Save names of ignored files and the reason they
141
                             were ignored to <file>.
142
   --no3                     Suppress third-generation language output.
143
                             This option can cause report summation to fail
144
                             if some reports were produced with this option
145
                             while others were produced without it.
146
   --print-filter-stages     Print to STDOUT processed source code before and 
147
                             after each filter is applied.
148
   --progress-rate=<n>       Show progress update after every <n> files are
149
                             processed (default <n>=100).
150
   --quiet                   Suppress all information messages except for
151
                             the final report.
152
   --report-file=<file>      Write the results to <file> instead of STDOUT.
153
   --read-lang-def=<file>    Load from <file> the language processing filters.
154
                             (see also --write-lang-def) then use these filters
155
                             instead of the built-in filters.
156
   --sdir=<dir>              Use <dir> as the scratch directory instead of
157
                             letting File::Temp chose the location.  Files
158
                             written to this location are not removed at
159
                             the end of the run (as they are with File::Temp).
160
   --show-ext[=<ext>]        Print information about all known (or just the
161
                             given) file extensions and exit.
162
   --show-lang[=<lang>]      Print information about all known (or just the
163
                             given) languages and exit.
164
   --strip-comments=<ext>    For each file processed, write to the current
165
                             directory a version of the file which has blank
166
                             lines and comments removed.  The name of each
167
                             stripped file is the original file name with 
168
                             .<ext> appended to it.
169
   --sum-reports             Input arguments are report files previously
170
                             created with the --report-file option.  Makes
171
                             a cumulative set of results containing the
172
                             sum of data from the individual report files.
173
   --write-lang-def=<file>   Writes to <file> the language processing filters
174
                             then exits.  Useful as a first step to creating
175
                             custom language definitions (see --read-lang-def).
176
   -v[=<n>]                  Verbose switch (optional numeric value).
177
   --version                 Print the version of this program and exit.
178
   --csv                     Write the results as comma separated values.
179
   --xml                     Write the results in XML.
180
   --yaml                    Write the results in YAML.
181
 
182
";
183
$| = 1;  # flush STDOUT
184
my $start_time = time();
185
my (
186
    $opt_categorized          ,
187
    $opt_found                ,
188
    @opt_force_lang           ,
189
    $opt_ignored              ,
190
    $opt_counted              ,
191
    $opt_show_ext             ,
192
    $opt_show_lang            ,
193
    $opt_progress_rate        ,
194
    $opt_print_filter_stages  ,
195
    $opt_v                    ,
196
    $opt_version              ,
197
    $opt_exclude_lang         ,
198
    $opt_exclude_dir          ,
199
    $opt_read_lang_def        ,
200
    $opt_write_lang_def       ,
201
    $opt_strip_comments       ,
202
    $opt_quiet                ,
203
    $opt_report_file          ,
204
    $opt_sdir                 ,
205
    $opt_sum_reports          ,
206
    $opt_no3                  ,
207
    $opt_extract_with         ,
208
    $opt_by_file              ,
209
    $opt_xml                  ,
210
    $opt_yaml                 ,
211
    $opt_csv                  ,
212
   );
213
GetOptions(
214
           "by_file"             => \$opt_by_file             ,
215
           "by-file"             => \$opt_by_file             ,
216
           "categorized=s"       => \$opt_categorized         ,
217
           "counted=s"           => \$opt_counted             ,
218
           "exclude_lang=s"      => \$opt_exclude_lang        ,
219
           "exclude-lang=s"      => \$opt_exclude_lang        ,
220
           "exclude_dir=s"       => \$opt_exclude_dir         ,
221
           "exclude-dir=s"       => \$opt_exclude_dir         ,
222
           "extract_with=s"      => \$opt_extract_with        , 
223
           "extract-with=s"      => \$opt_extract_with        , 
224
           "found=s"             => \$opt_found               ,
225
           "ignored=s"           => \$opt_ignored             ,
226
           "quiet"               => \$opt_quiet               ,
227
           "read_lang_def=s"     => \$opt_read_lang_def       ,
228
           "read-lang-def=s"     => \$opt_read_lang_def       ,
229
           "show_ext:s"          => \$opt_show_ext            ,
230
           "show-ext:s"          => \$opt_show_ext            ,
231
           "show_lang:s"         => \$opt_show_lang           ,
232
           "show-lang:s"         => \$opt_show_lang           ,
233
           "progress_rate=i"     => \$opt_progress_rate       ,
234
           "progress-rate=i"     => \$opt_progress_rate       ,
235
           "print_filter_stages" => \$opt_print_filter_stages ,
236
           "print-filter-stages" => \$opt_print_filter_stages ,
237
           "report_file=s"       => \$opt_report_file         ,
238
           "report-file=s"       => \$opt_report_file         ,
239
           "sdir=s"              => \$opt_sdir                ,
240
           "strip_comments=s"    => \$opt_strip_comments      ,
241
           "strip-comments=s"    => \$opt_strip_comments      ,
242
           "sum_reports"         => \$opt_sum_reports         ,
243
           "sum-reports"         => \$opt_sum_reports         ,
244
           "no3"                 => \$opt_no3                 ,
245
           "v:i"                 => \$opt_v                   ,
246
           "version"             => \$opt_version             ,
247
           "write_lang_def=s"    => \$opt_write_lang_def      ,
248
           "write-lang-def=s"    => \$opt_write_lang_def      ,
249
           "xml"                 => \$opt_xml                 ,
250
           "force_lang=s"        => \@opt_force_lang          ,
251
           "force-lang=s"        => \@opt_force_lang          ,
252
           "yaml"                => \$opt_yaml                ,
253
           "csv"                 => \$opt_csv                 ,
254
          );
255
 
256
my %Exclude_Language = ();
257
   %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang) 
258
        if $opt_exclude_lang;
259
my %Exclude_Dir      = ();
260
   %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir ) 
261
        if $opt_exclude_dir ;
262
# Options defaults:
263
$opt_progress_rate = 100 unless $opt_progress_rate;
264
$opt_v             =   0 unless $opt_v;
265
die $usage unless defined $opt_version         or
266
                  defined $opt_show_lang       or
267
                  defined $opt_show_ext        or
268
                  defined $opt_write_lang_def  or
269
                  scalar @ARGV >= 1;
270
# 1}}}
271
# Step 1:  Initialize global constants.        {{{1
272
#
273
my $ON_WINDOWS = 0;
274
   $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
275
   $ON_WINDOWS = 0 if $ENV{'SHELL'};  # make Cygwin look like Unix
276
 
277
my $nFiles_Found = 0;  # updated in make_file_list
278
my (%Language_by_Extension, %Language_by_Script,
279
    %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
280
    %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
281
   );
282
my %Error_Codes = ( 'Unable to read'              => -1,
283
                    'Neither file nor directory'  => -2, );
284
if ($opt_read_lang_def) {
285
    read_lang_def(
286
        $opt_read_lang_def     , #        Sample values:
287
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
288
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
289
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
290
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
291
                                 #      [ 'remove_matches' , '^\s*#'  ]
292
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
293
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
294
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
295
        );
296
} else {
297
    set_constants(               #
298
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
299
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
300
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
301
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
302
                                 #      [ 'remove_matches' , '^\s*#'  ]
303
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
304
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
305
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
306
        \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
307
        );
308
}
309
 
310
# Process command line provided extention-to-language mapping overrides.
311
# Make a hash of known languages in lower case for easier matching.
312
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
313
foreach my $language (keys %Filters_by_Language) {
314
    my $lang_lc = lc $language;
315
    $Recognized_Language_lc{$lang_lc} = $language;
316
}
317
my %Forced_Extension = (); # file name extensions which user wants to count
318
foreach my $pair (@opt_force_lang) {
319
    my ($lang, $extension) = split(',', $pair);
320
    $Forced_Extension{$extension} = $lang;
321
    my $lang_lc = lc $lang;
322
 
323
    die "Unknown language '$lang' used with --force-lang option.  " .
324
        "The command\n  $script --show-lang\n" .
325
        "will print all recognized languages.\n" 
326
        unless $Recognized_Language_lc{$lang_lc}; 
327
    $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
328
}
329
 
330
# 1}}}
331
# Step 2:  Early exits for display, summation. {{{1
332
#
333
if ($opt_version) {
334
    printf "%.2f\n", $VERSION;
335
    exit;
336
}
337
print_extension_info($opt_show_ext ) if defined $opt_show_ext ;
338
print_language_info( $opt_show_lang) if defined $opt_show_lang;
339
exit if (defined $opt_show_ext) or (defined $opt_show_lang);
340
 
341
# Windows doesn't expand wildcards.  Use code from Sean M. Burke's 
342
# Win32::Autoglob module to do this.
343
#print "Before glob have [", join(",", @ARGV), "]\n";
344
@ARGV = map {;
345
    ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
346
      } @ARGV if $ON_WINDOWS; 
347
;
348
#print "after  glob have [", join(",", @ARGV), "]\n";
349
 
350
if ($opt_sum_reports) {
351
    my %Results = ();
352
    foreach my $type( "by language", "by report file" ) {
353
        my $found_lang = combine_results(\@ARGV, 
354
                                          $type, 
355
                                         \%{$Results{ $type }}, 
356
                                         \%Filters_by_Language );
357
        next unless %Results;
358
        my $end_time = time();
359
        my @results  = generate_report($VERSION, $end_time - $start_time,
360
                                       $type,
361
                                      \%{$Results{ $type }}, \%Scale_Factor);
362
        if ($opt_report_file) {
363
            my $ext  = ".lang";
364
               $ext  = ".file" unless $type eq "by language";
365
            next if !$found_lang and  $ext  eq ".lang";
366
            write_file($opt_report_file . $ext, @results);
367
        } else {
368
            print "\n", join("\n", @results), "\n";
369
        }
370
    }
371
    exit;
372
}
373
if ($opt_write_lang_def) {
374
    write_lang_def($opt_write_lang_def   ,
375
                  \%Language_by_Extension,
376
                  \%Language_by_Script   ,
377
                  \%Language_by_File     ,
378
                  \%Filters_by_Language  ,
379
                  \%Not_Code_Extension   ,
380
                  \%Not_Code_Filename    ,
381
                  \%Scale_Factor         ,
382
                  );
383
    exit;
384
}
385
# 1}}}
386
# Step 3:  Create a list of files to consider. {{{1
387
#  a) If inputs are binary archives, first cd to a temp
388
#     directory, expand the archive with the user-given
389
#     extraction tool, then add the temp directory to
390
#     the list of dirs to process.
391
#  b) Create a list of every file that might contain source
392
#     code.  Ignore binary files, zero-sized files, and
393
#     any file in a directory the user says to exclude.
394
#  c) Determine the language for each file in the list.
395
#
396
my @binary_archive = ();
397
if ($opt_extract_with) {
398
    my $cwd = cwd();
399
#print "cwd main = [$cwd]\n";
400
    my @extract_location = ();
401
    foreach my $bin_file (@ARGV) {
402
        my $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
403
        chdir $extract_dir;
404
        print "Using temp dir [$extract_dir] to extract $bin_file\n" 
405
            if $opt_v;
406
        my $bin_file_full_path = "";
407
        if (File::Spec->file_name_is_absolute( $bin_file )) {
408
            $bin_file_full_path = $bin_file;
409
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
410
        } else {
411
            $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
412
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
413
        }
414
        (my $extract_cmd = $opt_extract_with ) 
415
            =~ s/>FILE</$bin_file_full_path/g;
416
        print  $extract_cmd, "\n";
417
        system $extract_cmd;
418
        push @extract_location, $extract_dir;
419
        chdir $cwd;
420
    }
421
    # It is possible that the binary archive itself contains additional
422
    # files compressed the same way (true for Java .ear files).  Go
423
    # through all the files that were extracted, see if they are binary
424
    # archives and try to extract them.  Lather, rinse, repeat.
425
    my $binary_archives_exist = 1;
426
    my $count_binary_archives = 0;
427
    my $previous_count        = 0;
428
    while ($binary_archives_exist) {
429
        @binary_archive = ();
430
        foreach my $dir (@extract_location) {
431
            find(\&archive_files, $dir);  # populates global @binary_archive
432
        }
433
        foreach my $archive (@binary_archive) {
434
            (my $extract_cmd = $opt_extract_with ) 
435
                =~ s/>FILE</$archive/g;
436
            my $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
437
            chdir  $extract_dir;
438
            print  $extract_cmd, "\n";
439
            system $extract_cmd;
440
            push @extract_location, $extract_dir;
441
            unlink $archive;  # otherwise will be extracting it forever 
442
        }
443
        $count_binary_archives = scalar @binary_archive;
444
        if ($count_binary_archives == $previous_count) {
445
            $binary_archives_exist = 0;
446
        }
447
        $previous_count = $count_binary_archives;
448
    }
449
    chdir $cwd;
450
 
451
    @ARGV = @extract_location;
452
}
453
my @Errors    = ();
454
my @file_list = ();  # global variable updated in files()
455
my %Ignored   = ();  # files that are not counted (language not recognized or
456
                     # problems reading the file)
457
my $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
458
#        make_file_list populates global variable @file_list via call to 
459
#        File::Find's find() which in turn calls files()
460
# 1}}}
461
# Step 4:  Remove duplicate files.             {{{1
462
#
463
my %Language           = ();
464
my %unique_source_file = ();
465
remove_duplicate_files($fh, \%Language   , \%unique_source_file, 
466
                            \%Error_Codes, \@Errors , \%Ignored);
467
printf "%8d unique file%s.                              \n", 
468
    plural_form(scalar keys %unique_source_file) 
469
    unless $opt_quiet;
470
# 1}}}
471
# Step 5:  Count code, comments, blank lines.  {{{1
472
#
473
 
474
my %Results_by_Language = ();
475
my %Results_by_File     = ();
476
my $nCounted = 0;
477
foreach my $file (sort keys %unique_source_file) {
478
    ++$nCounted;
479
    printf "Counting:  %d\r", $nCounted unless $nCounted % $opt_progress_rate;
480
    next if $Ignored{$file};
481
    if ($Exclude_Language{$Language{$file}}) {
482
        $Ignored{$file} = "--exclude_lang=$Language{$file}";
483
        next;
484
    }
485
    if (!defined @{$Filters_by_Language{$Language{$file}} }) {
486
        if ($Language{$file} eq "(unknown)") {
487
            $Ignored{$file} = "language unknown (#1)";
488
        } else {
489
            $Ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
490
        }
491
        next;
492
    }
493
 
494
    my ($all_line_count,
495
        $blank_count   ,
496
        $comment_count ,
497
       ) = call_counter($file, $Language{$file});
498
    my $code_count = $all_line_count - $blank_count - $comment_count;
499
    if ($opt_by_file) {
500
        $Results_by_File{$file}{'code'   } = $code_count     ;
501
        $Results_by_File{$file}{'blank'  } = $blank_count    ;
502
        $Results_by_File{$file}{'comment'} = $comment_count  ;
503
        $Results_by_File{$file}{'lang'   } = $Language{$file};
504
        $Results_by_File{$file}{'nFiles' } = 1;
505
    }
506
 
507
    ++$Results_by_Language{$Language{$file}}{'nFiles'};
508
    $Results_by_Language{$Language{$file}}{'code'}    += $code_count   ;
509
    $Results_by_Language{$Language{$file}}{'blank'}   += $blank_count  ;
510
    $Results_by_Language{$Language{$file}}{'comment'} += $comment_count;
511
}
512
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
513
write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
514
write_file($opt_counted, sort keys %Language) if $opt_counted;
515
# 1}}}
516
# Step 6:  Print results.                      {{{1
517
#
518
my $end_time = time();
519
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) 
520
    unless $opt_quiet;
521
print_errors(\%Error_Codes, \@Errors) if @Errors;
522
exit unless %Results_by_Language;
523
 
524
#use YAML; print YAML::Dump(\%Results_by_Language); die;
525
 
526
my @results = ();
527
unless ($opt_by_file) {
528
    @results = generate_report( $VERSION, $end_time - $start_time,
529
                               "by language",
530
                               \%Results_by_Language, \%Scale_Factor);
531
    if ($opt_report_file) { write_file($opt_report_file, @results); } 
532
    else                  { print "\n", join("\n", @results), "\n"; }
533
} else {
534
    @results = generate_report( $VERSION, $end_time - $start_time,
535
                                "by file",
536
                                \%Results_by_File,    \%Scale_Factor);
537
    if ($opt_report_file) { write_file($opt_report_file, @results); } 
538
    else                  { print "\n", join("\n", @results), "\n"; }
539
}
540
# 1}}}
541
 
542
sub combine_results {                        # {{{1
543
    # returns 1 if the inputs are categorized by language
544
    #         0 if no identifiable language was found
545
    my ($ra_report_files, # in
546
        $report_type    , # in  "by language" or "by report file"
547
        $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
548
        $rhaa_Filters_by_Language , # in
549
       ) = @_;
550
 
551
    my $found_language = 0;
552
 
553
    foreach my $file (@{$ra_report_files}) {
554
        my $IN = new IO::File $file, "r";
555
        if (!defined $IN) {
556
            warn "Unable to read $file; ignoring.\n";
557
            next;
558
        }
559
        while (<$IN>) {
560
            next if /^(http|Language|SUM|-----)/;
561
            if (m{^(.*?)\s+         # language
562
                   (\d+)\s+         # files
563
                   (\d+)\s+         # blank
564
                   (\d+)\s+         # comments
565
                   (\d+)\s+         # code
566
                   (                #    next four entries missing with -nno3
567
                   x\s+             # x
568
                   \d+\.\d+\s+      # scale
569
                   =\s+             # =
570
                   (\d+\.\d+)\s*    # scaled code
571
                   )?
572
                   $}x) {
573
                if ($report_type eq "by language") {
369 dpurdie 574
                    next unless (%{$rhaa_Filters_by_Language->{$1}});
229 dpurdie 575
                    # above test necessary to avoid trying to sum reports
576
                    # of reports (which have no language breakdown).
577
                    $found_language = 1;
578
                    $rhh_count->{$1   }{'nFiles' } += $2;
579
                    $rhh_count->{$1   }{'blank'  } += $3;
580
                    $rhh_count->{$1   }{'comment'} += $4;
581
                    $rhh_count->{$1   }{'code'   } += $5;
582
                    $rhh_count->{$1   }{'scaled' } += $7 unless $opt_no3;
583
                } else {
584
                    $rhh_count->{$file}{'nFiles' } += $2;
585
                    $rhh_count->{$file}{'blank'  } += $3;
586
                    $rhh_count->{$file}{'comment'} += $4;
587
                    $rhh_count->{$file}{'code'   } += $5;
588
                    $rhh_count->{$file}{'scaled' } += $7 unless $opt_no3;
589
                }
590
            }
591
        }
592
    }
593
    return $found_language;
594
 
595
} # 1}}}
596
sub generate_report {                        # {{{1
597
    # returns an array of lines containing the results
598
    my ($version    , # in
599
        $elapsed_sec, # in
600
        $report_type, # in  "by language" | "by report file" | "by file"
601
        $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
602
        $rh_scale   , # in
603
       ) = @_;
604
 
605
    my @results       = ();
606
 
607
    my $languages     = ();
608
 
609
    my $sum_files     = 0;
610
    my $sum_code      = 0;
611
    my $sum_blank     = 0;
612
    my $sum_comment   = 0;
613
    foreach my $language (keys %{$rhh_count}) {
614
        $sum_files   += $rhh_count->{$language}{'nFiles'} ;
615
        $sum_blank   += $rhh_count->{$language}{'blank'}  ;
616
        $sum_comment += $rhh_count->{$language}{'comment'};
617
        $sum_code    += $rhh_count->{$language}{'code'}   ;
618
    }
619
    my $sum_lines = $sum_blank + $sum_comment + $sum_code;
620
    $elapsed_sec = 0.5 unless $elapsed_sec;
621
 
622
    my $spacing_1 = 13;
623
    my $spacing_2 =  9;
624
    my $spacing_3 = 17;
625
    if ($opt_no3) {
626
        $spacing_1 = 19;
627
        $spacing_2 = 14;
628
        $spacing_3 = 28;
629
    }
630
    my %Format = (
631
        '1' => { 'xml' => 'name="%s" ',
632
                 'txt' => '%-23s '    ,
633
               },
634
        '2' => { 'xml' => 'name="%s" ',
635
                 'txt' => "\%-${spacing_3}s ",
636
               },
637
        '3' => { 'xml' => 'files_count="%d" ',
638
                 'txt' => '%5d ',
639
               },
640
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
641
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
642
               },
643
        '5' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
644
                 'txt' => ' x %6.2f = %14.2f',
645
               },
646
    );
647
    my $Style = "txt";
648
       $Style = "xml" if $opt_xml ;
649
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
650
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
651
 
652
    my $URL        = "http://cloc.sourceforge.net";
653
    my $hyphen_line = sprintf "%s", '-' x 79;
654
    my $data_line  = "";
655
    my $first_column;
656
    my $BY_LANGUAGE = 0;
657
    my $BY_FILE     = 0;
658
    if      ($report_type eq "by language") {
659
        $first_column = "Language";
660
        $BY_LANGUAGE  = 1;
661
    } elsif ($report_type eq "by file")     {
662
        $first_column = "File";
663
        $BY_FILE      = 1;
664
    } else {
665
        $first_column = "Report File";
666
    }
667
 
668
    my $header_line  = sprintf "%s v %4.2f", $URL, $version;
669
       $header_line .= sprintf("  T=%.1f s (%.1f files/s, %.1f lines/s)",
670
                        $elapsed_sec           ,
671
                        $sum_files/$elapsed_sec,
672
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
673
    if      ($opt_xml) {
674
        push @results, "<?xml version=\"1.0\"?>";
675
        push @results, "<results>";
676
        push @results, "<header>$header_line</header>";
677
    } elsif ($opt_yaml) {
678
        push @results, "---\n# $header_line";
679
    } elsif ($opt_csv) {
680
        # append the header to the end of the column headers
681
        # to keep the output a bit cleaner from a spreadsheet
682
        # perspective
683
    } else {
684
        push @results, $header_line;
685
        push @results, $hyphen_line;
686
    }
687
 
688
    if ($Style eq "txt") {
689
        # column headers
690
        $data_line  = sprintf "%-${spacing_1}s ", $first_column;
691
        if ($BY_FILE) {
692
            $data_line .= sprintf "%${spacing_2}s "  , " "    ;
693
        } else {
694
            $data_line .= sprintf "%${spacing_2}s "  , "files";
695
        }
696
        $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
697
            "blank"         ,
698
            "comment"       ,
699
            "code";
700
        $data_line .= sprintf " %8s   %14s",
701
            "scale"         ,
702
            "3rd gen. equiv"
703
              unless $opt_no3;
704
        push @results, $data_line;
705
        push @results, $hyphen_line;
706
    }
707
    if ($opt_csv) {
708
        my $header2 = " ,";
709
        $header2 = " ,files" unless $BY_FILE;
710
        $header2 .= ",blank,comment,code";
711
        $header2 .= ",scale,3rd gen. equiv" unless $opt_no3;
712
        $header2 .= ',"' . $header_line . '"';
713
        push @results, $header2;
714
    }
715
 
716
    my $sum_scaled = 0;
717
    foreach my $lang_or_file (sort {
718
                                 $rhh_count->{$b}{'code'} <=>
719
                                 $rhh_count->{$a}{'code'}
720
                               }
721
                          keys %{$rhh_count}) {
722
        my ($factor, $scaled);
723
        if ($BY_LANGUAGE or $BY_FILE) {
724
            $factor = 1;
725
            if ($BY_LANGUAGE) {
726
                if (defined $rh_scale->{$lang_or_file}) {
727
                    $factor = $rh_scale->{$lang_or_file};
728
                } else {
729
                    warn "No scale factor for $lang_or_file; using 1.00";
730
                }
731
            } else { # by individual code file
732
                $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
733
            }
734
            $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
735
        } else {
736
            if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
737
                $opt_no3 = 1;
738
                # If we're summing together files previously generated
739
                # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
740
                # this variable will be undefined.  That should only
741
                # happen when summing together by file however.
742
            } elsif ($BY_LANGUAGE) {
743
                warn "Missing scaled language info for $lang_or_file\n";
744
            }
745
            unless ($opt_no3) {
746
                $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
747
                $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
748
            }
749
        }
750
 
751
        if ($BY_FILE) {
752
            $data_line  = sprintf $Format{'1'}{$Style}, $lang_or_file;
753
        } else {
754
            $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
755
        }
756
        $data_line .= sprintf $Format{3}{$Style}  , 
757
                        $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
758
        $data_line .= sprintf $Format{4}{$Style}  , 
759
            $rhh_count->{$lang_or_file}{'blank'}  ,
760
            $rhh_count->{$lang_or_file}{'comment'},
761
            $rhh_count->{$lang_or_file}{'code'}   ;
762
        $data_line .= sprintf $Format{5}{$Style}  ,
763
            $factor                               ,
764
            $scaled unless $opt_no3;
765
        $sum_scaled  += $scaled unless $opt_no3;
766
 
767
        if ($opt_xml) {
768
            if (defined $rhh_count->{$lang_or_file}{'lang'}) {
769
                my $lang = $rhh_count->{$lang_or_file}{'lang'};
770
                if (!defined $languages->{$lang}) {
771
                    $languages->{$lang} = $lang;
772
                }
773
                $data_line.=' language="' . $lang . '" ';
774
            }
775
            if ($BY_FILE) {
776
                push @results, "<file " . $data_line . "/>";
777
            } else {
778
                push @results, "<language " . $data_line . "/>";
779
            }
780
        } elsif ($opt_yaml) {
781
            push @results, $lang_or_file . ":";
782
            push @results, "  nFiles: " .$rhh_count->{$lang_or_file}{'nFiles'} ;
783
            push @results, "  blank: "  .$rhh_count->{$lang_or_file}{'blank'}  ;
784
            push @results, "  comment: ".$rhh_count->{$lang_or_file}{'comment'};
785
            push @results, "  code: "   .$rhh_count->{$lang_or_file}{'code'} 
786
                unless $BY_FILE;
787
            if (!$opt_no3) {
788
                push @results, "  scaled: " . $scaled;
789
                push @results, "  factor: " . $factor;
790
            }
791
        } elsif ($opt_csv) {
792
            my $extra_3 = "";
793
               $extra_3 = ",$factor,$scaled" unless $opt_no3;
794
            push @results, $lang_or_file                         . "," .
795
                           $rhh_count->{$lang_or_file}{'nFiles'} . "," .
796
                           $rhh_count->{$lang_or_file}{'blank'}  . "," .
797
                           $rhh_count->{$lang_or_file}{'comment'}. "," .
798
                           $rhh_count->{$lang_or_file}{'code'}         .
799
                           $extra_3;
800
        } else {
801
            push @results, $data_line;
802
        }
803
    }
804
    my $avg_scale = 1;  # weighted average of scale factors
805
       $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code) 
806
            if $sum_code and !$opt_no3;
807
 
808
    if ($opt_xml) {
809
        $data_line = "";
810
        if (!$BY_FILE) {
811
            $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
812
        }
813
        $data_line .= sprintf $Format{'4'}{$Style},
814
            $sum_blank   ,
815
            $sum_comment ,
816
            $sum_code    ;
817
        $data_line .= sprintf $Format{'5'}{$Style},
818
            $avg_scale   ,
819
            $sum_scaled  unless $opt_no3;
820
        push @results, "<total " . $data_line . "/>";
821
        push @results, "<languages>";
822
 
823
        foreach my $language (keys %{$languages}) {
824
            push @results, '<language name="' . $language . '"/>';
825
        }
826
 
827
        push @results, "</languages>";
828
        push @results, "</results>";
829
    } elsif ($opt_yaml) {
830
        push @results, "SUM:";
831
        push @results, "  blank: "  . $sum_blank  ;
832
        push @results, "  code: "   . $sum_code   ;
833
        push @results, "  comment: ". $sum_comment;
834
        push @results, "  nFiles: " . $sum_files unless $BY_FILE;
835
        if (!$opt_no3) {
836
            push @results, "  scaled: " . $sum_scaled;
837
            push @results, "  factor: " . $avg_scale ;
838
        }
839
    } elsif ($opt_csv) {
840
        # do nothing
841
            push @results, "SUMMARY"                         . "," .
842
                           $sum_files . "," .
843
                           $sum_blank  . "," .
844
                           $sum_comment . "," .
845
                           $sum_code    . "," .
846
                           scalar keys %Ignored;
847
 
848
 
849
    } else {
850
 
851
        if ($BY_FILE) {
852
            $data_line  = sprintf "%-23s ", "SUM:"  ;
853
        } else {
854
            $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
855
            $data_line .= sprintf "%${spacing_2}d ", $sum_files;
856
        }
857
        $data_line .= sprintf $Format{'4'}{$Style},
858
            $sum_blank   ,
859
            $sum_comment ,
860
            $sum_code    ;
861
        $data_line .= sprintf $Format{'5'}{$Style},
862
            $avg_scale   ,
863
            $sum_scaled unless $opt_no3;
864
        push @results, $hyphen_line if $sum_files > 1;
865
        push @results, $data_line   if $sum_files > 1;
866
        push @results, $hyphen_line;
867
    }
868
    return @results;
869
} # 1}}}
870
sub print_errors {                           # {{{1
871
    my ($rh_Error_Codes, # in
872
        $raa_errors    , # in
873
       ) = @_;
874
 
875
    my %error_string = reverse(%{$rh_Error_Codes});
876
    my $nErrors      = scalar @{$raa_errors};
877
    printf "\n%d error%s:\n", plural_form(scalar @Errors);
878
    for (my $i = 0; $i < $nErrors; $i++) {
879
        printf "%s:  %s\n", 
880
            $error_string{ $raa_errors->[$i][0] },
881
            $raa_errors->[$i][1] ;
882
    }
883
    print "\n";
884
 
885
} # 1}}}
886
sub write_lang_def {                         # {{{1
887
    my ($file                     ,
888
        $rh_Language_by_Extension , # in
889
        $rh_Language_by_Script    , # in
890
        $rh_Language_by_File      , # in
891
        $rhaa_Filters_by_Language , # in
892
        $rh_Not_Code_Extension    , # in
893
        $rh_Not_Code_Filename     , # in
894
        $rh_Scale_Factor          , # in
895
       ) = @_;
896
 
897
    my $OUT = new IO::File $file, "w";
898
    die "Unable to write to $file\n" unless defined $OUT;
899
 
900
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
901
        next if $language eq "MATLAB/Objective C/MUMPS";
902
        printf $OUT "%s\n", $language;
903
        foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
904
            printf $OUT "    filter %s", $filter->[0];
905
            printf $OUT " %s", $filter->[1] if defined $filter->[1];
906
            print  $OUT "\n";
907
        }
908
        foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
909
            if ($language eq $rh_Language_by_Extension->{$ext}) {
910
                printf $OUT "    extension %s\n", $ext;
911
            }
912
        }
913
        foreach my $filename (sort keys %{$rh_Language_by_File}) {
914
            if ($language eq $rh_Language_by_File->{$filename}) {
915
                printf $OUT "    filename %s\n", $filename;
916
            }
917
        }
918
        foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
919
            if ($language eq $rh_Language_by_Script->{$script_exe}) {
920
                printf $OUT "    script_exe %s\n", $script_exe;
921
            }
922
        }
923
        printf $OUT "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
924
    }
925
 
926
    $OUT->close;
927
} # 1}}}
928
sub read_lang_def {                          # {{{1
929
    my ($file                     ,
930
        $rh_Language_by_Extension , # out
931
        $rh_Language_by_Script    , # out
932
        $rh_Language_by_File      , # out
933
        $rhaa_Filters_by_Language , # out
934
        $rh_Not_Code_Extension    , # out
935
        $rh_Not_Code_Filename     , # out
936
        $rh_Scale_Factor          , # out
937
       ) = @_;
938
 
939
    my $IN = new IO::File $file, "r";
940
    die "Unable to read $file.\n" unless defined $IN;
941
 
942
    my $language = "";
943
    while (<$IN>) {
944
        next if /^\s*#/ or /^\s*$/;
945
 
946
        if (/^(\w+.*?)\s*$/) {
947
            $language = $1;
948
            next;
949
        }
950
        die "Missing computer language name, line $. of $file\n"
951
            unless $language;
952
 
953
        if      (/^    filter\s+(\w+)\s*$/) {
954
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
955
 
956
        } elsif (/^    filter\s+(\w+)\s+(.*?)\s*$/) {
957
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
958
 
959
        } elsif (/^    extension\s+(\S+)\s*$/) {
960
            if (defined $rh_Language_by_Extension->{$1}) {
961
                die "File extension collision:  $1 ",
962
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
963
                    "and '$language'\n" ,
964
                    "Edit $file and remove $1 from one of these two ",
965
                    "language definitions.\n";
966
            }
967
            $rh_Language_by_Extension->{$1} = $language;
968
 
969
        } elsif (/^    filename\s+(\S+)\s*$/) {
970
            $rh_Language_by_File->{$1} = $language;
971
 
972
        } elsif (/^    script_exe\s+(\S+)\s*$/) {
973
            $rh_Language_by_Script->{$1} = $language;
974
 
975
        } elsif (/^    3rd_gen_scale\s+(\S+)\s*$/) {
976
            $rh_Scale_Factor->{$language} = $1;
977
 
978
        } else {
979
            die "Unexpected data line $. of $file:\n$_\n";
980
        }
981
 
982
    }
983
    $IN->close;
984
} # 1}}}
985
sub print_extension_info {                   # {{{1
986
    my ($extension,) = @_;
987
    if ($extension) {  # show information on this extension
988
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
989
            # Language_by_Extension{f}    = 'Fortran 77' 
990
            printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext}
991
                if $ext =~ m{$extension}i;
992
        }
993
    } else {           # show information on all  extensions
994
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
995
            # Language_by_Extension{f}    = 'Fortran 77' 
996
            printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext};
997
        }
998
    }
999
} # 1}}}
1000
sub print_language_info {                    # {{{1
1001
    my ($language,) = @_;
1002
    my %extensions = (); # the subset matched by the given $language value
1003
    if ($language) {  # show information on this language
1004
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
1005
            # Language_by_Extension{f}    = 'Fortran 77' 
1006
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
1007
                if $Language_by_Extension{$ext} =~ m{$language}i;
1008
        }
1009
    } else {          # show information on all  languages
1010
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
1011
            # Language_by_Extension{f}    = 'Fortran 77' 
1012
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
1013
        }
1014
    }
1015
 
1016
    # add exceptions (one file extension mapping to multiple languages)
1017
    if (!$language or 
1018
        $language =~ /^(Objective C|MATLAB|MUMPS)$/i) {
1019
        push @{$extensions{'Objective C'}}, "m";
1020
        push @{$extensions{'MATLAB'}}     , "m";
1021
        push @{$extensions{'MUMPS'}}      , "m";
1022
        delete $extensions{'MATLAB/Objective C/MUMPS'};
1023
    }
1024
 
1025
    if (%extensions) {
1026
        foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
1027
            printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
1028
        }
1029
    }
1030
} # 1}}}
1031
sub make_file_list {                         # {{{1
1032
    my ($ra_arg_list,  # in   file and/or directory names to examine
1033
        $rh_Err     ,  # in   hash of error codes
1034
        $raa_errors ,  # out  errors encountered
1035
        $rh_ignored ,  # out  files not recognized as computer languages
1036
        ) = @_;
1037
 
1038
    my ($fh, $filename);
1039
    if ($opt_categorized) {
1040
        $filename = $opt_categorized;
1041
        $fh = new IO::File $filename, "+>";  # open for read/write
1042
        die "Unable to write to $filename:  $!\n" unless defined $fh;
1043
    } elsif ($opt_sdir) {
1044
        # write to the user-defined scratch directory
1045
        $filename = $opt_sdir . '/cloc_file_list.txt';
1046
        $fh = new IO::File $filename, "+>";  # open for read/write
1047
        die "Unable to write to $filename:  $!\n" unless defined $fh;
1048
    } else {
1049
        # let File::Temp create a suitable temporary file
1050
        ($fh, $filename) = tempfile(UNLINK => 1);  # delete file on exit
1051
        print "Using temp file list [$filename]\n" if $opt_v;
1052
    }
1053
 
1054
    my @dir_list = ();
1055
    foreach my $file_or_dir (@{$ra_arg_list}) {
1056
#print "make_file_list file_or_dir=$file_or_dir\n";
1057
        my $size_in_bytes = 0;
1058
        if (!-r $file_or_dir) {
1059
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
1060
            next;
1061
        }
1062
        if (is_file($file_or_dir)) {
1063
            if (!(-s $file_or_dir)) {   # 0 sized file, named pipe, socket 
1064
                $rh_ignored->{$file_or_dir} = 'zero sized file';
1065
                next;
1066
            } elsif (-B $file_or_dir) { # avoid binary files
1067
                $rh_ignored->{$file_or_dir} = 'binary file';
1068
                next;
1069
            }
1070
            push @file_list, "$file_or_dir";
1071
        } elsif (is_dir ($file_or_dir)) {
1072
            push @dir_list, $file_or_dir;
1073
        } else {
1074
            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
1075
            $rh_ignored->{$file_or_dir} = 'not file, not directory';
1076
        }
1077
    }
1078
    foreach my $dir (@dir_list) {
1079
#print "make_file_list dir=$dir\n";
1080
        find(\&files, $dir);  # populates global variable @file_list
1081
    }
1082
    $nFiles_Found = scalar @file_list;
1083
    printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
1084
    write_file($opt_found, sort @file_list) if $opt_found;
1085
 
1086
    my $nFiles_Categorized = 0;
1087
    foreach my $file (@file_list) {
1088
        printf "classifying $file\n" if $opt_v > 2;
1089
 
1090
        my $basename = basename $file;
1091
        if ($Not_Code_Filename{$basename}) {
1092
            $rh_ignored->{$file} = "listed in " . '$' .
1093
                "Not_Code_Filename{$basename}";
1094
            next;
1095
        } elsif ($basename =~ m{~$}) {
1096
            $rh_ignored->{$file} = "temporary editor file";
1097
            next;
1098
        }
1099
 
1100
        my $size_in_bytes = (stat $file)[7];
1101
        my $language      = classify_file($file      ,
1102
                                          $rh_Err    ,
1103
                                          $raa_errors,
1104
                                          $rh_ignored);
1105
die  "make_file_list($file) undef size" unless defined $size_in_bytes;
1106
die  "make_file_list($file) undef lang" unless defined $language;
1107
        printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
1108
        ++$nFiles_Categorized;
1109
        printf "classified %d files\r", 
1110
            $nFiles_Categorized unless $nFiles_Categorized % $opt_progress_rate;
1111
    }
1112
    printf "classified %d files\r", $nFiles_Categorized 
1113
        if !$opt_quiet and $nFiles_Categorized > 1;
1114
 
1115
    return $fh;   # handle to the file containing the list of files to process
1116
}  # 1}}}
1117
sub remove_duplicate_files {                 # {{{1
1118
    my ($fh                   , # in 
1119
        $rh_Language          , # out
1120
        $rh_unique_source_file, # out
1121
        $rh_Err               , # in
1122
        $raa_errors           , # out  errors encountered
1123
        $rh_ignored           , # out
1124
        ) = @_;
1125
 
1126
    # Check for duplicate files by comparing file sizes.
1127
    # Where files are equally sized, compare their MD5 checksums.
1128
 
1129
    my $n = 0;
1130
    my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
1131
    seek($fh, 0, 0); # rewind to beginning of the temp file
1132
    while (<$fh>) {
1133
        ++$n;
1134
        my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
1135
        chomp($file);
1136
        $rh_Language->{$file} = $language;
1137
        push @{$files_by_size{$size_in_bytes}}, $file;
1138
    }
1139
    if ($n > $opt_progress_rate) {
1140
        printf "Duplicate file check %d files (%d known unique)\r", 
1141
            $n, scalar keys %files_by_size;
1142
    }
1143
    $n = 0;
1144
    foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
1145
        ++$n;
1146
        printf "Unique: %8d files                                          \r", 
1147
            $n unless $n % $opt_progress_rate;
1148
        $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
1149
        next unless scalar @{$files_by_size{$bytes}} > 1;
1150
        foreach my $F (different_files(\@{$files_by_size{$bytes}},
1151
                                        $rh_Err     ,
1152
                                        $raa_errors ,
1153
                                        $rh_ignored ) ) {
1154
            $rh_unique_source_file->{$F} = 1;
1155
        }
1156
    }
1157
} # 1}}}
1158
sub files {                                  # {{{1
1159
    # invoked by File::Find's find()   Populates global variable @file_list
1160
    if ($opt_exclude_dir) {
1161
        my $return = 0;
1162
        foreach my $skip_dir (keys %Exclude_Dir) {
1163
            if ($File::Find::dir =~ m{/$skip_dir(/|$)} ) {
1164
                $Ignored{$File::Find::name} = "--exclude_dir=$skip_dir";
1165
                $return = 1;
1166
                last;
1167
            }
1168
        }
1169
        return if $return;
1170
    }
1171
    my $nBytes = -s     $_ ;
1172
    if (!$nBytes and $opt_v > 5) {
1173
        printf "files(%s)  zero size\n", $File::Find::name;
1174
    }
1175
    return unless $nBytes  ; # attempting other tests w/pipe or socket will hang
1176
    my $is_dir = is_dir($_);
1177
    my $is_bin = -B     $_ ;
1178
    printf "files(%s)  size=%d is_dir=%d  -B=%d\n",
1179
        $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
1180
    return if $is_dir or $is_bin;
1181
    ++$nFiles_Found;
1182
    printf "%8d files\r", $nFiles_Found unless $nFiles_Found % $opt_progress_rate;
1183
    push @file_list, $File::Find::name;
1184
} # 1}}}
1185
sub archive_files {                          # {{{1
1186
    # invoked by File::Find's find()  Populates global variable @binary_archive
1187
    foreach my $ext (keys %Known_Binary_Archives) {
1188
        push @binary_archive, $File::Find::name 
1189
            if $File::Find::name =~ m{$ext$};
1190
    }
1191
} # 1}}}
1192
sub is_file {                                # {{{1
1193
    # portable method to test if item is a file
1194
    # (-f doesn't work in ActiveState Perl on Windows)
1195
    my $item = shift @_;
1196
 
1197
    if ($ON_WINDOWS) {
1198
        my $mode = (stat $item)[2];
1199
           $mode = 0 unless $mode;
1200
        if ($mode & 0100000) { return 1; } 
1201
        else                 { return 0; }
1202
    } else {
1203
        return (-f $item);  # works on Unix, Linux, CygWin, z/OS
1204
    }
1205
} # 1}}}
1206
sub is_dir {                                 # {{{1
1207
    # portable method to test if item is a directory
1208
    # (-d doesn't work in ActiveState Perl on Windows)
1209
    my $item = shift @_;
1210
 
1211
    if ($ON_WINDOWS) {
1212
        my $mode = (stat $item)[2];
1213
           $mode = 0 unless $mode;
1214
        if ($mode & 0040000) { return 1; } 
1215
        else                 { return 0; }
1216
    } else {
1217
        return (-d $item);  # works on Unix, Linux, CygWin, z/OS
1218
    }
1219
} # 1}}}
1220
sub classify_file {                          # {{{1
1221
    my ($full_file   , # in
1222
        $rh_Err      , # in   hash of error codes
1223
        $raa_errors  , # out
1224
        $rh_ignored  , # out
1225
       ) = @_;
1226
 
1227
    print "-> classify_file($full_file)\n" if $opt_v > 2;
1228
    my $language = "(unknown)";
1229
 
1230
    my $look_at_first_line = 0;
1231
    my $file = basename $full_file; 
1232
    return $language if $Not_Code_Filename{$file}; # (unknown)
1233
    return $language if $file =~ m{~$}; # a temp edit file (unknown)
1234
 
1235
    if ($file =~ /\.(\w+)$/) { # has an extension
1236
        print "$full_file extension=[$1]\n" if $opt_v > 2;
1237
        my $extension = $1;
1238
        if ($Not_Code_Extension{$extension} and 
1239
           !$Forced_Extension{$extension}) {
1240
           # If .1 (for example) is an extention that would ordinarily be
1241
           # ignored but the user has insisted this be counted with the
1242
           # --force-lang option, then go ahead and count it.
1243
            $rh_ignored->{$full_file} = 
1244
                'listed in $Not_Code_Extension{' . $extension . '}';
1245
            return $language;
1246
        }
1247
        if (defined $Language_by_Extension{$extension}) {
1248
            if ($Language_by_Extension{$extension} eq
1249
                'MATLAB/Objective C/MUMPS') {
1250
                my $lang_M_or_O = "";
1251
                matlab_or_objective_C($full_file , 
1252
                                      $rh_Err    ,
1253
                                      $raa_errors,
1254
                                     \$lang_M_or_O);
1255
                if ($lang_M_or_O) {
1256
                    return $lang_M_or_O;
1257
                } else { # an error happened in matlab_or_objective_C()
1258
                    $rh_ignored->{$full_file} = 
1259
                        'failure in matlab_or_objective_C()';
1260
                    return $language; # (unknown)
1261
                }
1262
            } else {
1263
                return $Language_by_Extension{$extension};
1264
            }
1265
        } else { # has an unmapped file extension
1266
            $look_at_first_line = 1;
1267
        }
1268
    } elsif (defined $Language_by_File{lc $file}) {
1269
        return $Language_by_File{lc $file};
1270
    } else {  # no file extension
1271
        $look_at_first_line = 1;
1272
    }
1273
 
1274
    if ($look_at_first_line) {
1275
        # maybe it is a shell/Perl/Python/Ruby/etc script that
1276
        # starts with pound bang:
1277
        #   #!/usr/bin/perl
1278
        #   #!/usr/bin/env perl
1279
        my $script_language = peek_at_first_line($full_file , 
1280
                                                 $rh_Err    , 
1281
                                                 $raa_errors);
1282
        if (!$script_language) {
1283
            $rh_ignored->{$full_file} = "language unknown (#2)";
1284
            # returns (unknown)
1285
        }
1286
        if (defined $Language_by_Script{$script_language}) {
1287
            if (defined $Filters_by_Language{
1288
                            $Language_by_Script{$script_language}}) {
1289
                $language = $Language_by_Script{$script_language};
1290
            } else {
1291
                $rh_ignored->{$full_file} = 
1292
                    "undefined:  Filters_by_Language{" . 
1293
                    $Language_by_Script{$script_language} .
1294
                    "} for scripting language $script_language";
1295
                # returns (unknown)
1296
            }
1297
        } else {
1298
            $rh_ignored->{$full_file} = "language unknown (#3)";
1299
            # returns (unknown)
1300
        }
1301
    }
1302
    print "<- classify_file($full_file)\n" if $opt_v > 2;
1303
    return $language;
1304
} # 1}}}
1305
sub peek_at_first_line {                     # {{{1
1306
    my ($file        , # in
1307
        $rh_Err      , # in   hash of error codes
1308
        $raa_errors  , # out
1309
       ) = @_;
1310
 
1311
    print "-> peek_at_first_line($file)\n" if $opt_v > 2;
1312
 
1313
    my $script_language = "";
1314
    if (!-r $file) {
1315
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
1316
        return $script_language;
1317
    }
1318
    my $IN = new IO::File $file, "r";
1319
    if (!defined $IN) {
1320
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
1321
        print "<- peek_at_first_line($file)\n" if $opt_v > 2;
1322
        return $script_language;
1323
    }
1324
    chomp(my $first_line = <$IN>);
1325
    if (defined $first_line) {
1326
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
1327
        if ($first_line =~ /^#\!\s*(\S.*?)$/) {
1328
#print "peek_at_first_line 1=[$1]\n";
1329
            my @pound_bang = split(' ', $1);
1330
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
1331
            if (basename($pound_bang[0]) eq "env" and 
1332
                scalar @pound_bang > 1) {
1333
                $script_language = $pound_bang[1];
1334
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
1335
            } else {
1336
                $script_language = basename $pound_bang[0];
1337
#print "peek_at_first_line pound_bang B $script_language\n";
1338
            }
1339
        }
1340
    }
1341
    $IN->close;
1342
    print "<- peek_at_first_line($file)\n" if $opt_v > 2;
1343
    return $script_language;
1344
} # 1}}}
1345
sub different_files {                        # {{{1
1346
    # See which of the given files are unique by computing each file's MD5
1347
    # sum.  Return the subset of files which are unique.
1348
    my ($ra_files    , # in
1349
        $rh_Err      , # in
1350
        $raa_errors  , # out
1351
        $rh_ignored  , # out
1352
       ) = @_;
1353
 
1354
    print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
1355
    my %file_hash = ();  # file_hash{ md5 hash } = file name
1356
    foreach my $F (@{$ra_files}) {
1357
        next if is_dir($F);  # needed for Windows
1358
        my $IN = new IO::File $F, "r";
1359
        if (!defined $IN) {
1360
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
1361
            $rh_ignored->{$F} = 'cannot read';
1362
        } else {
1363
            if ($HAVE_Digest_MD5) {
1364
                binmode $IN;
1365
                $file_hash{ Digest::MD5->new->addfile($IN)->hexdigest } = $F;
1366
            } else {
1367
                # all files treated unique
1368
                $file_hash{ $F } = $F;
1369
            }
1370
            $IN->close;
1371
        }
1372
    }
1373
    my @unique = values %file_hash;
1374
    print "<- different_files(@unique)\n" if $opt_v > 2;
1375
    return @unique;
1376
} # 1}}}
1377
sub call_counter {                           # {{{1
1378
    my ($file    , # in
1379
        $language, # in
1380
       ) = @_;
1381
 
1382
    # Logic:  pass the file through the following filters:
1383
    #         1. remove blank lines
1384
    #         2. remove comments using each filter defined for this language
1385
    #            (example:  SQL has two, remove_starts_with(--) and 
1386
    #             remove_c_comments() )
1387
    #         3. compute comment lines as 
1388
    #               total lines - blank lines - lines left over after all
1389
    #                   comment filters have been applied
1390
 
1391
    print "-> call_counter($file, $language)\n" if $opt_v > 2;
1392
    my @routines = @{$Filters_by_Language{$language}};
1393
#print "call_counter:  ", Dumper(@routines), "\n";
1394
 
1395
    my $IN = new IO::File $file, "r";
1396
    my @lines = <$IN>;
1397
    $IN->close;
1398
    # Some files don't end with a new line.  Force this:
1399
    $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
1400
 
1401
    my $total_lines = scalar @lines;
1402
 
1403
    print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
1404
    if ($language eq "COBOL") {
1405
        @lines = remove_cobol_blanks(\@lines);
1406
    } else {
1407
        @lines = remove_matches(\@lines, '^\s*$'); # removes blank lines
1408
    }
1409
    my $blank_lines = $total_lines - scalar @lines;
1410
    print_lines($file, "Blank lines removed:", \@lines) 
1411
        if $opt_print_filter_stages;
1412
 
1413
    foreach my $call_string (@routines) {
1414
#print "call_counter:  call_string=", Dumper($call_string), "\n";
1415
        my $subroutine = $call_string->[0];
1416
        if (! defined &{$subroutine}) {
1417
            warn "call_counter undefined subroutine $subroutine for $file\n";
1418
            next;
1419
        }
1420
        print "call_counter file=$file sub=$subroutine\n" if $opt_v > 1;
1421
        my @args  = @{$call_string};
1422
        shift @args; # drop the subroutine name
1423
        if (@args and $args[0] eq '>filename<') {
1424
            shift   @args;
1425
            unshift @args, $file;
1426
        }
1427
 
1428
        no strict 'refs';
1429
        @lines = &{$subroutine}(\@lines, @args);   # apply filter...
1430
 
1431
        print_lines($file, "After $subroutine(@args)", \@lines) 
1432
            if $opt_print_filter_stages;
1433
        @lines = remove_matches(\@lines, '^\s*$'); # ...then remove blank lines
1434
        print_lines($file, "post $subroutine(@args) blanks cleanup:", \@lines) 
1435
            if $opt_print_filter_stages;
1436
    }
1437
    my $comment_lines = $total_lines - $blank_lines - scalar  @lines;
1438
    if ($opt_strip_comments) {
1439
        my $stripped_file = basename $file . ".$opt_strip_comments";
1440
        write_file($stripped_file, @lines);
1441
    }
1442
 
1443
    print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n" 
1444
        if $opt_v > 2;
1445
    return ($total_lines, $blank_lines, $comment_lines);
1446
} # 1}}}
1447
sub write_file {                             # {{{1
1448
    my ($file  , # in
1449
        @lines , # in
1450
       ) = @_;
1451
 
1452
    print "-> write_file($file)\n" if $opt_v > 2;
1453
 
1454
    # Create the destination directory if it doesn't already exist.
1455
    my $abs_file_path = File::Spec->rel2abs( $file );
1456
    my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
1457
    mkpath($volume . $directories, 1, 0777);
1458
 
1459
    my $OUT = new IO::File $file, "w";
1460
    if (defined $OUT) {
1461
        chomp(@lines);
1462
        print $OUT join("\n", @lines), "\n";
1463
        $OUT->close;
1464
    } else {
1465
        warn "Unable to write to $file\n";
1466
    }
1467
    print "Wrote $file\n";
1468
 
1469
    print "<- write_file\n" if $opt_v > 2;
1470
} # 1}}}
1471
sub remove_f77_comments {                    # {{{1
1472
    my ($ra_lines, ) = @_;
1473
    print "-> remove_f77_comments\n" if $opt_v > 2;
1474
 
1475
    my @save_lines = ();
1476
    foreach (@{$ra_lines}) {
1477
        next if m{^[*cC]};
1478
        push @save_lines, $_;
1479
    }
1480
 
1481
    print "<- remove_f77_comments\n" if $opt_v > 2;
1482
    return @save_lines;
1483
} # 1}}}
1484
sub remove_f90_comments {                    # {{{1
1485
    # derived from SLOCCount
1486
    my ($ra_lines, ) = @_;
1487
    print "-> remove_f90_comments\n" if $opt_v > 2;
1488
 
1489
    my @save_lines = ();
1490
    foreach (@{$ra_lines}) {
1491
        # a comment is              m/^\s*!/
1492
        # an empty line is          m/^\s*$/
1493
        # a HPF statement is        m/^\s*!hpf\$/i
1494
        # an Open MP statement is   m/^\s*!omp\$/i
1495
        if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
1496
            push @save_lines, $_;
1497
        }
1498
    }
1499
 
1500
    print "<- remove_f90_comments\n" if $opt_v > 2;
1501
    return @save_lines;
1502
} # 1}}}
1503
sub remove_matches {                         # {{{1
1504
    my ($ra_lines, # in
1505
        $pattern , # in   Perl regular expression (case insensitive)
1506
       ) = @_;
1507
    print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
1508
 
1509
    my @save_lines = ();
1510
    foreach (@{$ra_lines}) {
1511
        next if m{$pattern}i;
1512
        push @save_lines, $_;
1513
    }
1514
 
1515
    print "<- remove_matches\n" if $opt_v > 2;
1516
    return @save_lines;
1517
} # 1}}}
1518
sub remove_above {                           # {{{1
1519
    my ($ra_lines, $marker, ) = @_;
1520
    print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
1521
 
1522
    # Make two passes through the code:
1523
    # 1. check if the marker exists
1524
    # 2. remove anything above the marker if it exists,
1525
    #    do nothing if the marker does not exist
1526
 
1527
    # Pass 1
1528
    my $found_marker = 0;
1529
    for (my $line_number  = 1;
1530
            $line_number <= scalar @{$ra_lines};
1531
            $line_number++) {
1532
        if ($ra_lines->[$line_number-1] =~ m{$marker}) {
1533
            $found_marker = $line_number;
1534
            last;
1535
        }
1536
    }
1537
 
1538
    # Pass 2 only if needed
1539
    my @save_lines = ();
1540
    if ($found_marker) {
1541
        my $n = 1;
1542
        foreach (@{$ra_lines}) {
1543
            push @save_lines, $_
1544
                if $n >= $found_marker;
1545
            ++$n;
1546
        }
1547
    } else { # marker wasn't found; save all lines
1548
        foreach (@{$ra_lines}) {
1549
            push @save_lines, $_;
1550
        }
1551
    }
1552
 
1553
    print "<- remove_above\n" if $opt_v > 2;
1554
    return @save_lines;
1555
} # 1}}}
1556
sub remove_below {                           # {{{1
1557
    my ($ra_lines, $marker, ) = @_;
1558
    print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
1559
 
1560
    my @save_lines = ();
1561
    foreach (@{$ra_lines}) {
1562
        last if m{$marker};
1563
        push @save_lines, $_;
1564
    }
1565
 
1566
    print "<- remove_below\n" if $opt_v > 2;
1567
    return @save_lines;
1568
} # 1}}}
1569
sub remove_between {                         # {{{1
1570
    my ($ra_lines, $marker, ) = @_;
1571
    # $marker must contain one of the balanced pairs understood
1572
    # by Regexp::Common::balanced, namely
1573
    # '{}'  '()'  '[]'  or  '<>'
1574
 
1575
    print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
1576
    my %acceptable = ('{}'=>1,  '()'=>1,  '[]'=>1,  '<>'=>1, );
1577
    die "remove_between:  invalid delimiter '$marker'\n",
1578
        "the delimiter must be one of these four pairs:\n",
1579
        "{}  ()  []  <>\n" unless
1580
        $acceptable{$marker};
1581
 
1582
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
1583
 
1584
    my $all_lines = join("", @{$ra_lines});
1585
 
1586
    no strict 'vars';
1587
    # otherwise get:
1588
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
1589
    if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
1590
        no warnings; 
1591
        $all_lines =~ s/$1//g;
1592
    }
1593
 
1594
    print "<- remove_between\n" if $opt_v > 2;
1595
    return split("\n", $all_lines);
1596
} # 1}}}
1597
sub remove_cobol_blanks {                    # {{{1
1598
    # subroutines derived from SLOCCount
1599
    my ($ra_lines, ) = @_;
1600
 
1601
    my $free_format = 0;  # Support "free format" source code.
1602
    my @save_lines  = ();
1603
 
1604
    foreach (@{$ra_lines}) {
1605
        next if m/^\s*$/;
1606
        my $line = expand($_);  # convert tabs to equivalent spaces
1607
        $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
1608
        if ($free_format) {
1609
            push @save_lines, $_;
1610
        } else {
1611
            push @save_lines, $_ unless m/^\d{6}\s*$/ or
1612
                              ($line =~ m/^\d{6}\s{66}/);
1613
        }
1614
    }
1615
    return @save_lines;
1616
} # 1}}}
1617
sub remove_cobol_comments {                  # {{{1
1618
    # subroutines derived from SLOCCount
1619
    my ($ra_lines, ) = @_;
1620
 
1621
    my $free_format = 0;  # Support "free format" source code.
1622
    my @save_lines  = ();
1623
 
1624
    foreach (@{$ra_lines}) {
1625
        if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
1626
        if ($free_format) {
1627
            push @save_lines, $_ unless m{^\s*\*};
1628
        } else {
1629
            push @save_lines, $_ unless m{^......\*} or m{^\*};
1630
        }
1631
    }
1632
    return @save_lines;
1633
} # 1}}}
1634
sub remove_jcl_comments {                    # {{{1
1635
    my ($ra_lines, ) = @_;
1636
 
1637
    print "-> remove_jcl_comments\n" if $opt_v > 2;
1638
 
1639
    my @save_lines = ();
1640
    my $in_comment = 0;
1641
    foreach (@{$ra_lines}) {
1642
        next if /^\s*$/;
1643
        next if m{^\s*//\*};
1644
        last if m{^\s*//\s*$};
1645
        push @save_lines, $_;
1646
    }
1647
 
1648
    print "<- remove_jcl_comments\n" if $opt_v > 2;
1649
    return @save_lines;
1650
} # 1}}}
1651
sub remove_jsp_comments {                    # {{{1
1652
    #  JSP comment is   <%--  body of comment   --%>
1653
    my ($ra_lines, ) = @_;
1654
 
1655
    print "-> remove_jsp_comments\n" if $opt_v > 2;
1656
 
1657
    my @save_lines = ();
1658
    my $in_comment = 0;
1659
    foreach (@{$ra_lines}) {
1660
 
1661
        next if /^\s*$/;
1662
        s/<\%\-\-.*?\-\-\%>//g;  # strip one-line comments
1663
        next if /^\s*$/;
1664
        if ($in_comment) {
1665
            if (/\-\-\%>/) {
1666
                s/^.*?\-\-\%>//;
1667
                $in_comment = 0;
1668
            }
1669
        }
1670
        next if /^\s*$/;
1671
        $in_comment = 1 if /^(.*?)<\%\-\-/;
1672
        next if defined $1 and $1 =~ /^\s*$/;
1673
        next if ($in_comment);
1674
        push @save_lines, $_;
1675
    }
1676
 
1677
    print "<- remove_jsp_comments\n" if $opt_v > 2;
1678
    return @save_lines;
1679
} # 1}}}
1680
sub remove_html_comments {                   # {{{1
1681
    #  HTML comment is   <!--  body of comment   -->
1682
    #  Need to use my own routine until the HTML comment regex in
1683
    #  the Regexp::Common module can handle  <!--  --  -->
1684
    my ($ra_lines, ) = @_;
1685
 
1686
    print "-> remove_html_comments\n" if $opt_v > 2;
1687
 
1688
    my @save_lines = ();
1689
    my $in_comment = 0;
1690
    foreach (@{$ra_lines}) {
1691
 
1692
        next if /^\s*$/;
1693
        s/<!\-\-.*?\-\->//g;  # strip one-line comments
1694
        next if /^\s*$/;
1695
        if ($in_comment) {
1696
            if (/\-\->/) {
1697
                s/^.*?\-\->//;
1698
                $in_comment = 0;
1699
            }
1700
        }
1701
        next if /^\s*$/;
1702
        $in_comment = 1 if /^(.*?)<!\-\-/;
1703
        next if defined $1 and $1 =~ /^\s*$/;
1704
        next if ($in_comment);
1705
        push @save_lines, $_;
1706
    }
1707
 
1708
    print "<- remove_html_comments\n" if $opt_v > 2;
1709
    return @save_lines;
1710
} # 1}}}
1711
sub determine_lit_type {                     # {{{1
1712
  my ($file) = @_;
1713
 
1714
  open (FILE, $file);
1715
  while (<FILE>) {
1716
    if (m/^\\begin{code}/) { close FILE; return 2; }
1717
    if (m/^>\s/) { close FILE; return 1; }
1718
  }
1719
 
1720
  return 0;
1721
} # 1}}}
1722
sub remove_haskell_comments {                # {{{1
1723
    # Bulk of code taken from SLOCCount's haskell_count script.
1724
    # Strips out {- .. -} and -- comments and counts the rest.
1725
    # Pragmas, {-#...}, are counted as SLOC.
1726
    # BUG: Doesn't handle strings with embedded block comment markers gracefully.
1727
    #      In practice, that shouldn't be a problem.
1728
    my ($ra_lines, $file, ) = @_;
1729
 
1730
    print "-> remove_haskell_comments\n" if $opt_v > 2;
1731
 
1732
    my @save_lines = ();
1733
    my $in_comment = 0;
1734
    my $incomment  = 0;
1735
    my ($literate, $inlitblock) = (0,0);
1736
 
1737
    $literate = 1 if $file =~ /\.lhs$/;
1738
    if($literate) { $literate = determine_lit_type($file) }
1739
 
1740
    foreach (@{$ra_lines}) {
1741
        if ($literate == 1) {
1742
            if (!s/^>//) { s/.*//; }
1743
        } elsif ($literate == 2) {
1744
            if ($inlitblock) {
1745
                if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; }
1746
            } elsif (!$inlitblock) {
1747
                if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; }
1748
                else { s/.*//; }
1749
            }
1750
        }
1751
 
1752
        if ($incomment) {
1753
            if (m/\-\}/) { s/^.*?\-\}//;  $incomment = 0;}
1754
            else { s/.*//; }
1755
        }
1756
        if (!$incomment) {
1757
            s/--.*//;
1758
            s!{-[^#].*?-}!!g;
1759
            if (m/{-/ && (!m/{-#/)) {
1760
              s/{-.*//;
1761
              $incomment = 1;
1762
            }
1763
        }
1764
        if (m/\S/) { push @save_lines, $_; }
1765
    }
1766
#   if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
1767
 
1768
    print "<- remove_haskell_comments\n" if $opt_v > 2;
1769
    return @save_lines;
1770
} # 1}}}
1771
sub print_lines {                            # {{{1
1772
    my ($file     , # in
1773
        $title    , # in
1774
        $ra_lines , # in
1775
       ) = @_;
1776
    printf "->%-30s %s\n", $file, $title;
1777
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
1778
        printf "%5d | %s", $i+1, $ra_lines->[$i];
1779
        print "\n" unless $ra_lines->[$i] =~ m{\n$}
1780
    }
1781
} # 1}}}
1782
sub set_constants {                          # {{{1
1783
    my ($rh_Language_by_Extension , # out
1784
        $rh_Language_by_Script    , # out
1785
        $rh_Language_by_File      , # out
1786
        $rhaa_Filters_by_Language , # out
1787
        $rh_Not_Code_Extension    , # out
1788
        $rh_Not_Code_Filename     , # out
1789
        $rh_Scale_Factor          , # out
1790
        $rh_Known_Binary_Archives , # out
1791
       ) = @_;
1792
# 1}}}
1793
%{$rh_Language_by_Extension} = (                 # {{{1
1794
            'abap'        => 'ABAP'                  ,
1795
            'ac'          => 'm4'                    ,
1796
            'ada'         => 'Ada'                   ,
1797
            'adb'         => 'Ada'                   ,
1798
            'ads'         => 'Ada'                   ,
1799
            'adso'        => 'ADSO/IDSM'             ,
1800
            'am'          => 'make'                  ,
1801
            'asa'         => 'ASP'                   ,
1802
            'asax'        => 'ASP.Net'               ,
1803
            'ascx'        => 'ASP.Net'               ,
1804
            'asm'         => 'Assembler'             ,
1805
            'asmx'        => 'ASP.Net'               ,
1806
            'asp'         => 'ASP'                   ,
1807
            'aspx'        => 'ASP.Net'               ,
1808
            'master'      => 'ASP.Net'               ,
1809
            'sitemap'     => 'ASP.Net'               ,
1810
            'awk'         => 'awk'                   ,
1811
            'bash'        => 'Bourne Again Shell'    ,
1812
            'bas'         => 'Visual Basic'          ,
1813
            'bat'         => 'DOS Batch'             ,
1814
            'BAT'         => 'DOS Batch'             ,
1815
            'cbl'         => 'COBOL'                 ,
1816
            'CBL'         => 'COBOL'                 ,
1817
            'c'           => 'C'                     ,
1818
            'C'           => 'C++'                   ,
1819
            'cc'          => 'C++'                   ,
1820
            'ccs'         => 'CCS'                   ,
1821
            'cfm'         => 'ColdFusion'            ,
1822
            'cl'          => 'Lisp'                  ,
1823
            'cls'         => 'Visual Basic'          ,
1824
            'cob'         => 'COBOL'                 ,
1825
            'COB'         => 'COBOL'                 ,
1826
            'config'      => 'ASP.Net'               ,
1827
            'cpp'         => 'C++'                   ,
1828
            'cs'          => 'C#'                    ,
1829
            'csh'         => 'C Shell'               ,
1830
            'css'         => "CSS"                   ,
1831
            'cxx'         => 'C++'                   ,
1832
            'da'          => 'DAL'                   ,
1833
            'def'         => 'Teamcenter def'        ,
1834
            'dmap'        => 'NASTRAN DMAP'          ,
1835
            'dpr'         => 'Pascal'                ,
1836
            'dtd'         => 'DTD'                   ,
1837
            'ec'          => 'C'                     ,
1838
            'el'          => 'Lisp'                  ,
1839
            'exp'         => 'Expect'                ,
1840
            'f77'         => 'Fortran 77'            ,
1841
            'F77'         => 'Fortran 77'            ,
1842
            'f90'         => 'Fortran 90'            ,
1843
            'F90'         => 'Fortran 90'            ,
1844
            'f95'         => 'Fortran 95'            ,
1845
            'F95'         => 'Fortran 95'            ,
1846
            'f'           => 'Fortran 77'            ,
1847
            'F'           => 'Fortran 77'            ,
1848
            'fmt'         => 'Oracle Forms'          ,
1849
            'focexec'     => 'Focus'                 ,
1850
            'frm'         => 'Visual Basic'          ,
1851
            'gnumakefile' => 'make'                  ,
1852
            'Gnumakefile' => 'make'                  ,
1853
            'h'           => 'C/C++ Header'          ,
1854
            'H'           => 'C/C++ Header'          ,
1855
            'hh'          => 'C/C++ Header'          ,
1856
            'hpp'         => 'C/C++ Header'          ,
1857
            'hs'          => 'Haskell'               , 
1858
            'htm'         => 'HTML'                  ,
1859
            'html'        => 'HTML'                  ,
1860
            'i3'          => 'Modula3'               ,
1861
            'idl'         => 'IDL'                   ,
1862
            'ig'          => 'Modula3'               ,
1863
            'inc'         => 'inc'                   , # might be PHP
1864
            'itk'         => 'Tcl/Tk'                ,
1865
            'java'        => 'Java'                  ,
1866
            'jcl'         => 'JCL'                   , # IBM Job Control Lang.
1867
            'jl'          => 'Lisp'                  ,
1868
            'js'          => 'Javascript'            ,
1869
            'jsp'         => 'JSP'                   , # Java server pages
1870
            'ksh'         => 'Korn Shell'            ,
1871
            'lhs'         => 'Haskell'               ,
1872
            'l'           => 'lex'                   ,
1873
            'lsp'         => 'Lisp'                  ,
1874
            'lua'         => 'Lua'                   ,
1875
            'm3'          => 'Modula3'               ,
1876
            'm4'          => 'm4'                    ,
1877
            'makefile'    => 'make'                  ,
1878
            'Makefile'    => 'make'                  ,
1879
            'met'         => 'Teamcenter met'        ,
1880
            'mg'          => 'Modula3'               , 
1881
            'mli'         => 'ML'                    ,
1882
            'ml'          => 'ML'                    , 
1883
            'm'           => 'MATLAB/Objective C/MUMPS' ,
1884
            'wdproj'      => 'MSBuild scripts' ,
1885
            'csproj'      => 'MSBuild scripts' ,
1886
            'mps'         => 'MUMPS'                 ,
1887
            'mth'         => 'Teamcenter mth'        ,
1888
            'oscript'     => 'LiveLink OScript'      ,
1889
            'pad'         => 'Ada'                   , # Oracle Ada preprocessor
1890
            'pas'         => 'Pascal'                ,
1891
            'pcc'         => 'C++'                   , # Oracle C++ preprocessor
1892
            'perl'        => 'Perl'                  ,
1893
            'pfo'         => 'Fortran 77'            ,
1894
            'pgc'         => 'C'                     , # Postgres embedded C/C++
1895
            'php3'        => 'PHP'                   ,
1896
            'php4'        => 'PHP'                   ,
1897
            'php5'        => 'PHP'                   ,
1898
            'php'         => 'PHP'                   ,
1899
            'plh'         => 'Perl'                  ,
1900
            'pl'          => 'Perl'                  ,
1901
            'PL'          => 'Perl'                  ,
1902
            'plx'         => 'Perl'                  ,
1903
            'pm'          => 'Perl'                  ,
1904
            'p'           => 'Pascal'                ,
1905
            'pp'          => 'Pascal'                ,
1906
            'psql'        => 'SQL'                   ,
1907
            'py'          => 'Python'                ,
1908
            'rb'          => 'Ruby'                  ,
1909
         #  'resx'        => 'ASP.Net'               ,
1910
            'rex'         => 'Oracle Reports'        ,
1911
            'rexx'        => 'Rexx'                  ,
1912
            's'           => 'Assembler'             ,
1913
            'S'           => 'Assembler'             ,
1914
            'sbl'         => 'Softbridge Basic'      ,
1915
            'SBL'         => 'Softbridge Basic'      ,
1916
            'sc'          => 'Lisp'                  ,
1917
            'scm'         => 'Lisp'                  ,
1918
            'sed'         => 'sed'                   ,
1919
            'ses'         => 'Patran Command Language'   ,
1920
            'sh'          => 'Bourne Shell'          ,
1921
            'sql'         => 'SQL'                   ,
1922
            'tcl'         => 'Tcl/Tk'                ,
1923
            'tcsh'        => 'C Shell'               ,
1924
            'tk'          => 'Tcl/Tk'                ,
1925
            'vba'         => 'Visual Basic'          ,
1926
         #  'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
1927
            'vb'          => 'Visual Basic'          ,
1928
         #  'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
1929
            'vbs'         => 'Visual Basic'          ,
1930
            'webinfo'     => 'ASP.Net'               ,
1931
            'xml'         => 'XML'                   ,
1932
            'build'       => 'NAnt scripts'          ,
1933
            'vim'         => 'vim script'            ,
1934
            'xsd'         => 'XSD'                   ,
1935
            'xslt'        => 'XSLT'                  ,
1936
            'xsl'         => 'XSLT'                  ,
1937
            'y'           => 'yacc'                  ,
1938
            'yaml'        => 'YAML'                  ,
1939
            );
1940
# 1}}}
1941
%{$rh_Language_by_Script}    = (                 # {{{1
1942
            'awk'      => 'awk'                   ,
1943
            'bash'     => 'Bourne Again Shell'    ,
1944
            'bc'       => 'bc'                    ,# calculator
1945
            'csh'      => 'C Shell'               ,
1946
            'idl'      => 'IDL'                   ,
1947
            'ksh'      => 'Korn Shell'            ,
1948
            'make'     => 'make'                  ,
1949
            'octave'   => 'Octave'                ,
1950
            'perl5'    => 'Perl'                  ,
1951
            'perl'     => 'Perl'                  ,
1952
            'ruby'     => 'Ruby'                  ,
1953
            'sed'      => 'sed'                   ,
1954
            'sh'       => 'Bourne Shell'          ,
1955
            'tcl'      => 'Tcl/Tk'                ,
1956
            'tcsh'     => 'C Shell'               ,
1957
            'wish'     => 'Tcl/Tk'                ,
1958
            );
1959
# 1}}}
1960
%{$rh_Language_by_File}      = (                 # {{{1
1961
            'Makefile'    => 'make'                  ,
1962
            'makefile'    => 'make'                  ,
1963
            'gnumakefile' => 'make'                  ,
1964
            'Gnumakefile' => 'make'                  ,
1965
            );
1966
# 1}}}
1967
%{$rhaa_Filters_by_Language} = (                 # {{{1
1968
    'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
1969
    'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
1970
    'ASP.Net'            => [   [ 'call_regexp_common'  , 'C'      ], ],
1971
    'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
1972
    'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
1973
    'Assembler'          => [  
1974
                                [ 'remove_matches'      , '^\s*//' ],
1975
                                [ 'remove_matches'      , '^\s*;'  ],
1976
                                [ 'call_regexp_common'  , 'C'      ], 
1977
                            ],
1978
    'awk'                => [   [ 'remove_matches'      , '^\s*#'  ], ], 
1979
    'bc'                 => [   [ 'remove_matches'      , '^\s*#'  ], ], 
1980
    'C'                  => [   
1981
                                [ 'remove_matches'      , '^\s*//' ], # C99
1982
                                [ 'call_regexp_common'  , 'C'      ], ], 
1983
    'C++'                => [   
1984
                                [ 'remove_matches'      , '^\s*//' ], 
1985
                                [ 'call_regexp_common'  , 'C'      ],
1986
                            ],
1987
    'C/C++ Header'       => [   [ 'call_regexp_common'  , 'C'      ], ],
1988
    'C#'                 => [   
1989
                                [ 'remove_matches'      , '^\s*//' ], 
1990
                                [ 'call_regexp_common'  , 'C'      ],
1991
                            ],
1992
    'CCS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
1993
    'CSS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
1994
    'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
1995
    'ColdFusion'         => [   [ 'remove_html_comments',          ],
1996
                                [ 'call_regexp_common'  , 'HTML'   ], ],
1997
    'Crystal Reports'    => [   [ 'remove_matches'      , '^\s*//' ], ],
1998
    'DAL'                => [   [ 'remove_between'      , '[]',    ], ],
1999
    'NASTRAN DMAP'       => [   [ 'remove_matches'      , '^\s*\$' ], ],
2000
    'DOS Batch'          => [   [ 'remove_matches'      , '^\s*rem', ], ],
2001
    'DTD'                => [   [ 'remove_html_comments',          ],
2002
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2003
    'Expect'             => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2004
    'Focus'              => [   [ 'remove_matches'      , '^\s*\-\*'  ], ],
2005
    'Fortran 77'         => [   [ 'remove_f77_comments' ,          ], ],
2006
    'Fortran 90'         => [   [ 'remove_f77_comments' ,          ],
2007
                                [ 'remove_f90_comments' ,          ], ],
2008
    'Fortran 95'         => [   [ 'remove_f77_comments' ,          ],
2009
                                [ 'remove_f90_comments' ,          ], ],
2010
    'HTML'               => [   [ 'remove_html_comments',          ],
2011
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2012
    'Haskell'            => [   [ 'remove_haskell_comments', '>filename<' ], ],
2013
    'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
2014
    'JSP'                => [   [ 'remove_html_comments',          ],
2015
                                [ 'call_regexp_common'  , 'HTML'   ],
2016
                                [ 'remove_jsp_comments',           ], ],
2017
    'Java'               => [   
2018
                                [ 'remove_matches'      , '^\s*//' ], 
2019
                                [ 'call_regexp_common'  , 'C'      ],
2020
                            ],
2021
    'Javascript'         => [   
2022
                                [ 'remove_matches'      , '^\s*//' ], 
2023
                                [ 'call_regexp_common'  , 'C'      ],
2024
                            ],
2025
    'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
2026
    'Lisp'               => [   [ 'remove_matches'      , '^\s*;'  ], ],
2027
    'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
2028
    'Lua'                => [   [ 'call_regexp_common'  , 'lua'    ], ],
2029
    'make'               => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2030
    'MATLAB'             => [   [ 'remove_matches'      , '^\s*%'  ], ], 
2031
    'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
2032
        # Modula 3 comments are (* ... *) so applying the Pascal filter
2033
        # which also treats { ... } as a comment is not really correct.
2034
    'Objective C'        => [   [ 'call_regexp_common'  , 'C'      ], ], 
2035
    'MATLAB/Objective C/MUMPS' => [ [ 'die' ,          ], ], # never called
2036
    'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ], 
2037
    'Octave'             => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2038
    'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
2039
    'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
2040
    'Pascal'             => [   [ 'call_regexp_common'  , 'Pascal' ], ],
2041
    'Patran Command Language'=> [   
2042
                                [ 'remove_matches'      , '^\s*#'   ], 
2043
                                [ 'remove_matches'      , '^\s*\$#' ], 
2044
                                [ 'call_regexp_common'  , 'C'       ],
2045
                            ],
2046
    'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
2047
                                [ 'remove_matches'      , '^\s*#'  ], ], 
2048
    'Python'             => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2049
    'PHP'                => [   
2050
                                [ 'remove_matches'      , '^\s*#'  ],
2051
                                [ 'remove_matches'      , '^\s*//' ], 
2052
                                [ 'call_regexp_common'  , 'C'      ], 
2053
                            ],
2054
    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
2055
    'Ruby'               => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2056
    'SQL'                => [   
2057
                                [ 'remove_matches'      , '^\s*--' ],
2058
                                [ 'call_regexp_common'  , 'C'      ], 
2059
                            ],
2060
    'sed'                => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2061
    'Bourne Again Shell' => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2062
    'Bourne Shell'       => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2063
    'm4'                 => [   [ 'remove_matches'      , '^dnl '  ], ], 
2064
    'C Shell'            => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2065
    'Korn Shell'         => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2066
    'Tcl/Tk'             => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2067
    'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2068
    'Teamcenter met'     => [   [ 'call_regexp_common'  , 'C'      ], ],
2069
    'Teamcenter mth'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
2070
    'Softbridge Basic'   => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
2071
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
2072
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
2073
    'vim script'         => [   [ 'remove_matches'      , '^\s*"'  ], ],
2074
    'Visual Basic'       => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
2075
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
2076
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
2077
    'yacc'               => [   [ 'call_regexp_common'  , 'C'      ], ],
2078
    'YAML'               => [   [ 'remove_matches'      , '^\s*#'  ], ],
2079
    'lex'                => [   [ 'call_regexp_common'  , 'C'      ], ],
2080
    'XML'                => [   [ 'remove_html_comments',          ],
2081
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2082
    'XSD'                => [   [ 'remove_html_comments',          ],
2083
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2084
    'XSLT'               => [   [ 'remove_html_comments',          ],
2085
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2086
    'NAnt scripts'       => [   [ 'remove_html_comments',          ],
2087
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2088
    'MSBuild scripts'    => [   [ 'remove_html_comments',          ],
2089
                                [ 'call_regexp_common'  , 'HTML'   ], ],
2090
    );
2091
# 1}}}
2092
%{$rh_Not_Code_Extension}    = (                 # {{{1
2093
   '1'       => 1,  # Man pages (documentation):
2094
   '2'       => 1,
2095
   '3'       => 1,
2096
   '4'       => 1,
2097
   '5'       => 1,
2098
   '6'       => 1,
2099
   '7'       => 1,
2100
   '8'       => 1,
2101
   '9'       => 1,
2102
   'a'       => 1,  # Static object code.
2103
   'ad'      => 1,  # X application default resource file.
2104
   'afm'     => 1,  # font metrics
2105
   'arc'     => 1,  # arc(1) archive
2106
   'arj'     => 1,  # arj(1) archive
2107
   'au'      => 1,  # Audio sound filearj(1) archive
2108
   'bak'     => 1,  # Backup files - we only want to count the "real" files.
2109
   'bdf'     => 1,
2110
   'bmp'     => 1,
2111
   'bz2'     => 1,  # bzip2(1) compressed file
2112
   'csv'     => 1,  # comma separated values
2113
   'desktop' => 1,
2114
   'dic'     => 1,
2115
   'doc'     => 1,
2116
   'elc'     => 1,
2117
   'eps'     => 1,
2118
   'fig'     => 1,
2119
   'gif'     => 1,
2120
   'gz'      => 1,
2121
   'hdf'     => 1,  # hierarchical data format
2122
   'in'      => 1,  # Debatable.
2123
   'jpg'     => 1,
2124
   'kdelnk'  => 1,
2125
   'man'     => 1,
2126
   'mf'      => 1,
2127
   'mp3'     => 1,
2128
   'n'       => 1,
2129
   'o'       => 1,  # Object code is generated from source code.
2130
   'pbm'     => 1,
2131
   'pdf'     => 1,
2132
   'pfb'     => 1,
2133
   'png'     => 1,
2134
   'po'      => 1,
2135
   'ps'      => 1,  # Postscript is _USUALLY_ generated automatically.
2136
   'sgm'     => 1,
2137
   'sgml'    => 1,
2138
   'so'      => 1,  # Dynamically-loaded object code.
2139
   'Tag'     => 1,
2140
   'tex'     => 1,
2141
   'text'    => 1,
2142
   'tfm'     => 1,
2143
   'tgz'     => 1,  # gzipped tarball
2144
   'tiff'    => 1,
2145
   'txt'     => 1, 
2146
   'vf'      => 1,
2147
   'wav'     => 1,
2148
   'xbm'     => 1,
2149
   'xpm'     => 1,
2150
   'Y'       => 1,  # file compressed with "Yabba"
2151
   'Z'       => 1,  # file compressed with "compress"
2152
   'zip'     => 1,  # zip archive
2153
); # 1}}}
2154
%{$rh_Not_Code_Filename}     = (                 # {{{1
2155
   'AUTHORS'     => 1,
2156
   'README'      => 1,
2157
   'Readme'      => 1,
2158
   'readme'      => 1,
2159
   'README.tk'   => 1, # used in kdemultimedia, it's confusing.
2160
   'Changelog'   => 1,
2161
   'ChangeLog'   => 1,
2162
   'Repository'  => 1,
2163
   'CHANGES'     => 1,
2164
   'Changes'     => 1,
2165
   '.cvsignore'  => 1,
2166
   'Root'        => 1, # CVS
2167
   'BUGS'        => 1,
2168
   'TODO'        => 1,
2169
   'COPYING'     => 1,
2170
   'MAINTAINERS' => 1,
2171
   'Entries'     => 1,
2172
   'iconfig.h'   => 1, # Skip "iconfig.h" files; they're used in Imakefiles
2173
                       # (used in xlockmore):
2174
);
2175
# 1}}}
2176
%{$rh_Scale_Factor}          = (                 # {{{1
2177
    '1032/af'                      =>   5.00,
2178
    '1st generation default'       =>   0.25,
2179
    '2nd generation default'       =>   0.75,
2180
    '3rd generation default'       =>   1.00,
2181
    '4th generation default'       =>   4.00,
2182
    '5th generation default'       =>  16.00,
2183
    'aas macro'                    =>   0.88,
2184
    'abap/4'                       =>   5.00,
2185
    'ABAP'                         =>   5.00,
2186
    'accel'                        =>   4.21,
2187
    'access'                       =>   2.11,
2188
    'actor'                        =>   3.81,
2189
    'acumen'                       =>   2.86,
2190
    'Ada'                          =>   0.52,
2191
    'Ada 83'                       =>   1.13,
2192
    'Ada 95'                       =>   1.63,
2193
    'adr/dl'                       =>   2.00,
2194
    'adr/ideal/pdl'                =>   4.00,
2195
    'ads/batch'                    =>   4.00,
2196
    'ads/online'                   =>   4.00,
2197
    'ADSO/IDSM'                    =>   3.00,
2198
    'advantage'                    =>   2.11,
2199
    'ai shell default'             =>   1.63,
2200
    'ai shells'                    =>   1.63,
2201
    'algol 68'                     =>   0.75,
2202
    'algol w'                      =>   0.75,
2203
    'ambush'                       =>   2.50,
2204
    'aml'                          =>   1.63,
2205
    'amppl ii'                     =>   1.25,
2206
    'ansi basic'                   =>   1.25,
2207
    'ansi cobol 74'                =>   0.75,
2208
    'ansi cobol 85'                =>   0.88,
2209
    'SQL'                          =>   6.15,
2210
    'answer/db'                    =>   6.15,
2211
    'apl 360/370'                  =>   2.50,
2212
    'apl default'                  =>   2.50,
2213
    'apl*plus'                     =>   2.50,
2214
    'applesoft basic'              =>   0.63,
2215
    'application builder'          =>   4.00,
2216
    'application manager'          =>   2.22,
2217
    'aps'                          =>   0.96,
2218
    'aps'                          =>   4.71,
2219
    'apt'                          =>   1.13,
2220
    'aptools'                      =>   4.00,
2221
    'arc'                          =>   1.63,
2222
    'ariel'                        =>   0.75,
2223
    'arity'                        =>   1.63,
2224
    'arity prolog'                 =>   1.25,
2225
    'art'                          =>   1.63,
2226
    'art enterprise'               =>   1.74,
2227
    'artemis'                      =>   2.00,
2228
    'artim'                        =>   1.74,
2229
    'as/set'                       =>   4.21,
2230
    'asi/inquiry'                  =>   6.15,
2231
    'ask windows'                  =>   1.74,
2232
'asa'                         =>   1.29,
2233
'ASP'                         =>   1.29,
2234
'ASP.Net'                     =>   1.29,
2235
'aspx'                        =>   1.29,
2236
#'resx'                        =>   1.29,
2237
'asax'                        =>   1.29,
2238
'ascx'                        =>   1.29,
2239
'asmx'                        =>   1.29,
2240
'config'                      =>   1.29,
2241
'webinfo'                     =>   1.29,
2242
'CCS'                         =>   5.33,
2243
 
2244
#   'assembler (basic)'            =>   0.25,
2245
    'Assembler'                    =>   0.25,
2246
 
2247
    'assembler (macro)'            =>   0.51,
2248
    'associative default'          =>   1.25,
2249
    'autocoder'                    =>   0.25,
2250
    'awk'                          =>   3.81,
2251
    'aztec c'                      =>   0.63,
2252
    'balm'                         =>   0.75,
2253
    'base sas'                     =>   1.51,
2254
    'basic'                        =>   0.75,
2255
    'basic a'                      =>   0.63,
2256
#   'basic assembly'               =>   0.25,
2257
    'bc'                           =>   1.50,
2258
    'berkeley pascal'              =>   0.88,
2259
    'better basic'                 =>   0.88,
2260
    'bliss'                        =>   0.75,
2261
    'bmsgen'                       =>   2.22,
2262
    'boeingcalc'                   =>  13.33,
2263
    'bteq'                         =>   6.15,
2264
 
2265
    'C'                            =>   0.77,
2266
 
2267
    'c set 2'                      =>   0.88,
2268
 
2269
    'C#'                           =>   1.36,
2270
 
2271
    'C++'                          =>   1.51,
2272
 
2273
    'c86plus'                      =>   0.63,
2274
    'cadbfast'                     =>   2.00,
2275
    'caearl'                       =>   2.86,
2276
    'cast'                         =>   1.63,
2277
    'cbasic'                       =>   0.88,
2278
    'cdadl'                        =>   4.00,
2279
    'cellsim'                      =>   1.74,
2280
'ColdFusion'               =>   4.00,
2281
    'chili'                        =>   0.75,
2282
    'chill'                        =>   0.75,
2283
    'cics'                         =>   1.74,
2284
    'clarion'                      =>   1.38,
2285
    'clascal'                      =>   1.00,
2286
    'cli'                          =>   2.50,
2287
    'clipper'                      =>   2.05,
2288
    'clipper db'                   =>   2.00,
2289
    'clos'                         =>   3.81,
2290
    'clout'                        =>   2.00,
2291
    'cms2'                         =>   0.75,
2292
    'cmsgen'                       =>   4.21,
2293
    'COBOL'                        =>   1.04,
2294
    'COBOL ii'                     =>   0.75,
2295
    'COBOL/400'                    =>   0.88,
2296
    'cobra'                        =>   4.00,
2297
    'codecenter'                   =>   2.22,
2298
    'cofac'                        =>   2.22,
2299
    'cogen'                        =>   2.22,
2300
    'cognos'                       =>   2.22,
2301
    'cogo'                         =>   1.13,
2302
    'comal'                        =>   1.00,
2303
    'comit ii'                     =>   1.25,
2304
    'common lisp'                  =>   1.25,
2305
    'concurrent pascal'            =>   1.00,
2306
    'conniver'                     =>   1.25,
2307
    'cool:gen/ief'                 =>   2.58,
2308
    'coral 66'                     =>   0.75,
2309
    'corvet'                       =>   4.21,
2310
    'corvision'                    =>   5.33,
2311
    'cpl'                          =>   0.50,
2312
    'Crystal Reports'              =>   4.00,
2313
    'csl'                          =>   1.63,
2314
    'csp'                          =>   1.51,
2315
    'cssl'                         =>   1.74,
2316
 
2317
'CSS' => 1.0,
2318
 
2319
    'culprit'                      =>   1.57,
2320
    'cxpert'                       =>   1.63,
2321
    'cygnet'                       =>   4.21,
2322
    'DAL'                          =>   1.50,
2323
    'data base default'            =>   2.00,
2324
    'dataflex'                     =>   2.00,
2325
    'datatrieve'                   =>   4.00,
2326
    'dbase iii'                    =>   2.00,
2327
    'dbase iv'                     =>   1.54,
2328
    'dcl'                          =>   0.38,
2329
    'decision support default'     =>   2.22,
2330
    'decrally'                     =>   2.00,
2331
    'delphi'                       =>   2.76,
2332
    'dl/1'                         =>   2.00,
2333
    'NASTRAN DMAP'                 =>   2.35,
2334
    'dna4'                         =>   4.21,
2335
    'DOS Batch'                    =>   0.63,
2336
    'dsp assembly'                 =>   0.50,
2337
    'dtabl'                        =>   1.74,
2338
    'dtipt'                        =>   1.74,
2339
    'dyana'                        =>   1.13,
2340
    'dynamoiii'                    =>   1.74,
2341
    'easel'                        =>   2.76,
2342
    'easy'                         =>   1.63,
2343
    'easytrieve+'                  =>   2.35,
2344
    'eclipse'                      =>   1.63,
2345
    'eda/sql'                      =>   6.67,
2346
    'edscheme 3.4'                 =>   1.51,
2347
    'eiffel'                       =>   3.81,
2348
    'enform'                       =>   1.74,
2349
    'englishbased default'         =>   1.51,
2350
    'ensemble'                     =>   2.76,
2351
    'epos'                         =>   4.00,
2352
    'erlang'                       =>   2.00,
2353
    'esf'                          =>   2.00,
2354
    'espadvisor'                   =>   1.63,
2355
    'espl/i'                       =>   1.13,
2356
    'euclid'                       =>   0.75,
2357
    'excel'                        =>   1.74,
2358
    'excel 12'                     =>  13.33,
2359
    'excel 34'                     =>  13.33,
2360
    'excel 5'                      =>  13.33,
2361
    'express'                      =>   2.22,
2362
    'exsys'                        =>   1.63,
2363
    'extended common lisp'         =>   1.43,
2364
    'eznomad'                      =>   2.22,
2365
    'facets'                       =>   4.00,
2366
    'factorylink iv'               =>   2.76,
2367
    'fame'                         =>   2.22,
2368
    'filemaker pro'                =>   2.22,
2369
    'flavors'                      =>   2.76,
2370
    'flex'                         =>   1.74,
2371
    'flexgen'                      =>   2.76,
2372
    'Focus'                        =>   1.90,
2373
    'foil'                         =>   1.51,
2374
    'forte'                        =>   4.44,
2375
    'forth'                        =>   1.25,
2376
    'Fortran 66'                   =>   0.63,
2377
    'Fortran 77'                   =>   0.75,
2378
    'Fortran 90'                   =>   1.00,
2379
    'Fortran 95'                   =>   1.13,
2380
    'Fortran II'                   =>   0.63,
2381
    'foundation'                   =>   2.76,
2382
    'foxpro'                       =>   2.29,
2383
    'foxpro 1'                     =>   2.00,
2384
    'foxpro 2.5'                   =>   2.35,
2385
    'framework'                    =>  13.33,
2386
    'g2'                           =>   1.63,
2387
    'gamma'                        =>   5.00,
2388
    'genascript'                   =>   2.96,
2389
    'gener/ol'                     =>   6.15,
2390
    'genexus'                      =>   5.33,
2391
    'genifer'                      =>   4.21,
2392
    'geode 2.0'                    =>   5.00,
2393
    'gfa basic'                    =>   2.35,
2394
    'gml'                          =>   1.74,
2395
    'golden common lisp'           =>   1.25,
2396
    'gpss'                         =>   1.74,
2397
    'guest'                        =>   2.86,
2398
    'guru'                         =>   1.63,
2399
    'gw basic'                     =>   0.82,
2400
    'Haskell'                      =>   2.11,
2401
    'high c'                       =>   0.63,
2402
    'hlevel'                       =>   1.38,
2403
    'hp basic'                     =>   0.63,
2404
 
2405
'HTML'          => 1.90 ,
2406
'XML'           => 1.90 ,
2407
'XSLT'          => 1.90 ,
2408
'DTD'           => 1.90 ,
2409
'XSD'           => 1.90 ,
2410
'NAnt scripts'    => 1.90 ,
2411
'MSBuild scripts' => 1.90 , 
2412
 
2413
    'HTML 2'                       =>   5.00,
2414
    'HTML 3'                       =>   5.33,
2415
    'huron'                        =>   5.00,
2416
    'ibm adf i'                    =>   4.00,
2417
    'ibm adf ii'                   =>   4.44,
2418
    'ibm advanced basic'           =>   0.82,
2419
    'ibm cics/vs'                  =>   2.00,
2420
    'ibm compiled basic'           =>   0.88,
2421
    'ibm vs cobol'                 =>   0.75,
2422
    'ibm vs cobol ii'              =>   0.88,
2423
    'ices'                         =>   1.13,
2424
    'icon'                         =>   1.00,
2425
    'ideal'                        =>   1.54,
2426
    'idms'                         =>   2.00,
2427
    'ief'                          =>   5.71,
2428
    'ief/cool:gen'                 =>   2.58,
2429
    'iew'                          =>   5.71,
2430
    'ifps/plus'                    =>   2.50,
2431
    'imprs'                        =>   2.00,
2432
    'informix'                     =>   2.58,
2433
    'ingres'                       =>   2.00,
2434
    'inquire'                      =>   6.15,
2435
    'insight2'                     =>   1.63,
2436
    'install/1'                    =>   5.00,
2437
    'intellect'                    =>   1.51,
2438
    'interlisp'                    =>   1.38,
2439
    'interpreted basic'            =>   0.75,
2440
    'interpreted c'                =>   0.63,
2441
    'iqlisp'                       =>   1.38,
2442
    'iqrp'                         =>   6.15,
2443
    'j2ee'                         =>   1.60,
2444
    'janus'                        =>   1.13,
2445
    'Java'                         =>   1.36,
2446
'Javascript'                   =>   1.48,
2447
'JSP'                          =>   1.48,
2448
    'JCL'                          =>   1.67,
2449
    'joss'                         =>   0.75,
2450
    'jovial'                       =>   0.75,
2451
    'jsp'                          =>   1.36,
2452
    'kappa'                        =>   2.00,
2453
    'kbms'                         =>   1.63,
2454
    'kcl'                          =>   1.25,
2455
    'kee'                          =>   1.63,
2456
    'keyplus'                      =>   2.00,
2457
    'kl'                           =>   1.25,
2458
    'klo'                          =>   1.25,
2459
    'knowol'                       =>   1.63,
2460
    'krl'                          =>   1.38,
2461
    'Korn Shell'                   =>   3.81,
2462
    'ladder logic'                 =>   2.22,
2463
    'lambit/l'                     =>   1.25,
2464
    'lattice c'                    =>   0.63,
2465
    'liana'                        =>   0.63,
2466
    'lilith'                       =>   1.13,
2467
    'linc ii'                      =>   5.71,
2468
    'Lisp'                         =>   1.25,
2469
    'LiveLink OScript'             =>   3.5 ,
2470
    'loglisp'                      =>   1.38,
2471
    'loops'                        =>   3.81,
2472
    'lotus 123 dos'                =>  13.33,
2473
    'lotus macros'                 =>   0.75,
2474
    'lotus notes'                  =>   3.64,
2475
    'lucid 3d'                     =>  13.33,
2476
    'lyric'                        =>   1.51,
2477
    'm'                            =>   5.00,
2478
    'macforth'                     =>   1.25,
2479
    'mach1'                        =>   2.00,
2480
    'machine language'             =>   0.13,
2481
    'maestro'                      =>   5.00,
2482
    'magec'                        =>   5.00,
2483
    'magik'                        =>   3.81,
2484
    'Lake'                         =>   3.81,
2485
    'make'                         =>   2.50,
2486
    'mantis'                       =>   2.96,
2487
    'mapper'                       =>   0.99,
2488
    'mark iv'                      =>   2.00,
2489
    'mark v'                       =>   2.22,
2490
    'mathcad'                      =>  16.00,
2491
    'mdl'                          =>   2.22,
2492
    'mentor'                       =>   1.51,
2493
    'mesa'                         =>   0.75,
2494
    'microfocus cobol'             =>   1.00,
2495
    'microforth'                   =>   1.25,
2496
    'microsoft c'                  =>   0.63,
2497
    'microstep'                    =>   4.00,
2498
    'miranda'                      =>   2.00,
2499
    'model 204'                    =>   2.11,
2500
    'modula 2'                     =>   1.00,
2501
    'mosaic'                       =>  13.33,
2502
    # 'ms c ++ v. 7'                 =>   1.51,
2503
    'ms compiled basic'            =>   0.88,
2504
    'msl'                          =>   1.25,
2505
    'mulisp'                       =>   1.25,
2506
    'MUMPS'                        =>   4.21,
2507
    'Nastran'                      =>   1.13,
2508
    'natural'                      =>   1.54,
2509
    'natural 1'                    =>   1.51,
2510
    'natural 2'                    =>   1.74,
2511
    'natural construct'            =>   3.20,
2512
    'natural language'             =>   0.03,
2513
    'netron/cap'                   =>   4.21,
2514
    'nexpert'                      =>   1.63,
2515
    'nial'                         =>   1.63,
2516
    'nomad2'                       =>   2.00,
2517
    'nonprocedural default'        =>   2.22,
2518
    'notes vip'                    =>   2.22,
2519
    'nroff'                        =>   1.51,
2520
    'object assembler'             =>   1.25,
2521
    'object lisp'                  =>   2.76,
2522
    'object logo'                  =>   2.76,
2523
    'object pascal'                =>   2.76,
2524
    'object star'                  =>   5.00,
2525
    'Objective C'                  =>   2.96,
2526
    'objectoriented default'       =>   2.76,
2527
    'objectview'                   =>   3.20,
2528
    'ogl'                          =>   1.00,
2529
    'omnis 7'                      =>   2.00,
2530
    'oodl'                         =>   2.76,
2531
    'ops'                          =>   1.74,
2532
    'ops5'                         =>   1.38,
2533
    'oracle'                       =>   2.76,
2534
    'Oracle Reports'               =>   2.76,
2535
    'Oracle Forms'                 =>   2.67,
2536
    'Oracle Developer/2000'        =>   3.48,
2537
    'oscar'                        =>   0.75,
2538
    'pacbase'                      =>   1.67,
2539
    'pace'                         =>   2.00,
2540
    'paradox/pal'                  =>   2.22,
2541
    'Pascal'                       =>   0.88,
2542
    'Patran Command Language'      =>   2.50,
2543
    'pc focus'                     =>   2.22,
2544
    'pdl millenium'                =>   3.81,
2545
    'pdp11 ade'                    =>   1.51,
2546
    'peoplesoft'                   =>   2.50,
2547
    'Perl'                         =>   4.00,
2548
    'persistance object builder'   =>   3.81,
2549
    'pilot'                        =>   1.51,
2550
    'pl/1'                         =>   1.38,
2551
    'pl/m'                         =>   1.13,
2552
    'pl/s'                         =>   0.88,
2553
    'pl/sql'                       =>   2.58,
2554
    'planit'                       =>   1.51,
2555
    'planner'                      =>   1.25,
2556
    'planperfect 1'                =>  11.43,
2557
    'plato'                        =>   1.51,
2558
    'polyforth'                    =>   1.25,
2559
    'pop'                          =>   1.38,
2560
    'poplog'                       =>   1.38,
2561
    'power basic'                  =>   1.63,
2562
    'powerbuilder'                 =>   3.33,
2563
    'powerhouse'                   =>   5.71,
2564
    'ppl (plus)'                   =>   2.00,
2565
    'problemoriented default'      =>   1.13,
2566
    'proc'                         =>   2.96,
2567
    'procedural default'           =>   0.75,
2568
    'professional pascal'          =>   0.88,
2569
    'program generator default'    =>   5.00,
2570
    'progress v4'                  =>   2.22,
2571
    'proiv'                        =>   1.38,
2572
    'prolog'                       =>   1.25,
2573
    'prose'                        =>   0.75,
2574
    'proteus'                      =>   0.75,
2575
    'qbasic'                       =>   1.38,
2576
    'qbe'                          =>   6.15,
2577
    'qmf'                          =>   5.33,
2578
    'qnial'                        =>   1.63,
2579
    'quattro'                      =>  13.33,
2580
    'quattro pro'                  =>  13.33,
2581
    'query default'                =>   6.15,
2582
    'quick basic 1'                =>   1.25,
2583
    'quick basic 2'                =>   1.31,
2584
    'quick basic 3'                =>   1.38,
2585
    'quick c'                      =>   0.63,
2586
    'quickbuild'                   =>   2.86,
2587
    'quiz'                         =>   5.33,
2588
    'rally'                        =>   2.00,
2589
    'ramis ii'                     =>   2.00,
2590
    'rapidgen'                     =>   2.86,
2591
    'ratfor'                       =>   0.88,
2592
    'rdb'                          =>   2.00,
2593
    'realia'                       =>   1.74,
2594
    'realizer 1.0'                 =>   2.00,
2595
    'realizer 2.0'                 =>   2.22,
2596
    'relate/3000'                  =>   2.00,
2597
    'reuse default'                =>  16.00,
2598
    'Rexx'                         =>   1.19,
2599
    'Rexx (mvs)'                   =>   1.00,
2600
    'Rexx (os/2)'                  =>   1.74,
2601
    'rm basic'                     =>   0.88,
2602
    'rm cobol'                     =>   0.75,
2603
    'rm fortran'                   =>   0.75,
2604
    'rpg i'                        =>   1.00,
2605
    'rpg ii'                       =>   1.63,
2606
    'rpg iii'                      =>   1.63,
2607
    'rtexpert 1.4'                 =>   1.38,
2608
    'sabretalk'                    =>   0.90,
2609
    'sail'                         =>   0.75,
2610
    'sapiens'                      =>   5.00,
2611
    'sas'                          =>   1.95,
2612
    'savvy'                        =>   6.15,
2613
    'sbasic'                       =>   0.88,
2614
    'sceptre'                      =>   1.13,
2615
    'scheme'                       =>   1.51,
2616
    'screen painter default'       =>  13.33,
2617
    'sequal'                       =>   6.67,
2618
    'Bourne Shell'                 =>   3.81,
2619
    'Bourne Again Shell'           =>   3.81,
2620
    'ksh'                          =>   3.81,
2621
    'C Shell'                      =>   3.81,
2622
    'siebel tools '                =>   6.15,
2623
    'simplan'                      =>   2.22,
2624
    'simscript'                    =>   1.74,
2625
    'simula'                       =>   1.74,
2626
    'simula 67'                    =>   1.74,
2627
    'simulation default'           =>   1.74,
2628
    'slogan'                       =>   0.98,
2629
    'smalltalk'                    =>   2.50,
2630
    'smalltalk 286'                =>   3.81,
2631
    'smalltalk 80'                 =>   3.81,
2632
    'smalltalk/v'                  =>   3.81,
2633
    'snap'                         =>   1.00,
2634
    'snobol24'                     =>   0.63,
2635
    'softscreen'                   =>   5.71,
2636
    'Softbridge Basic'             =>   2.76,
2637
    'solo'                         =>   1.38,
2638
    'speakeasy'                    =>   2.22,
2639
    'spinnaker ppl'                =>   2.22,
2640
    'splus'                        =>   2.50,
2641
    'spreadsheet default'          =>  13.33,
2642
    'sps'                          =>   0.25,
2643
    'spss'                         =>   2.50,
2644
    'SQL'                          =>   2.29,
2645
    'sqlwindows'                   =>   6.67,
2646
    'statistical default'          =>   2.50,
2647
    'strategem'                    =>   2.22,
2648
    'stress'                       =>   1.13,
2649
    'strongly typed default'       =>   0.88,
2650
    'style'                        =>   1.74,
2651
    'superbase 1.3'                =>   2.22,
2652
    'surpass'                      =>  13.33,
2653
    'sybase'                       =>   2.00,
2654
    'symantec c++'                 =>   2.76,
2655
    'symbolang'                    =>   1.25,
2656
    'synchroworks'                 =>   4.44,
2657
    'synon/2e'                     =>   4.21,
2658
    'systemw'                      =>   2.22,
2659
    'tandem access language'       =>   0.88,
2660
    'Tcl/Tk'                       =>   1.25,
2661
    'Teamcenter def'               =>   1.00,
2662
    'Teamcenter met'               =>   1.00,
2663
    'Teamcenter mth'               =>   1.00,
2664
    'telon'                        =>   5.00,
2665
    'tessaract'                    =>   2.00,
2666
    'the twin'                     =>  13.33,
2667
    'themis'                       =>   6.15,
2668
    'tiief'                        =>   5.71,
2669
    'topspeed c++'                 =>   2.76,
2670
    'transform'                    =>   5.33,
2671
    'translisp plus'               =>   1.43,
2672
    'treet'                        =>   1.25,
2673
    'treetran'                     =>   1.25,
2674
    'trs80 basic'                  =>   0.63,
2675
    'true basic'                   =>   1.25,
2676
    'turbo c'                      =>   0.63,
2677
    # 'turbo c++'                    =>   1.51,
2678
    'turbo expert'                 =>   1.63,
2679
    'turbo pascal >5'              =>   1.63,
2680
    'turbo pascal 14'              =>   1.00,
2681
    'turbo pascal 45'              =>   1.13,
2682
    'turbo prolog'                 =>   1.00,
2683
    'turing'                       =>   1.00,
2684
    'tutor'                        =>   1.51,
2685
    'twaice'                       =>   1.63,
2686
    'ucsd pascal'                  =>   0.88,
2687
    'ufo/ims'                      =>   2.22,
2688
    'uhelp'                        =>   2.50,
2689
    'uniface'                      =>   5.00,
2690
    # 'unix shell scripts'           =>   3.81,
2691
    'vax acms'                     =>   1.38,
2692
    'vax ade'                      =>   2.00,
2693
    'vbscript'                     =>   2.35,
2694
    'vectran'                      =>   0.75,
2695
    'vhdl '                        =>   4.21,
2696
    'vim script'                   =>   3.00,
2697
    'visible c'                    =>   1.63,
2698
    'visible cobol'                =>   2.00,
2699
    'visicalc 1'                   =>   8.89,
2700
    'visual 4.0'                   =>   2.76,
2701
    'visual basic'                 =>   1.90,
2702
    'visual basic 1'               =>   1.74,
2703
    'visual basic 2'               =>   1.86,
2704
    'visual basic 3'               =>   2.00,
2705
    'visual basic 4'               =>   2.22,
2706
    'visual basic 5'               =>   2.76,
2707
    'Visual Basic'                 =>   2.76,
2708
    'visual basic dos'             =>   2.00,
2709
    'visual c++'                   =>   2.35,
2710
    'visual cobol'                 =>   4.00,
2711
    'visual objects'               =>   5.00,
2712
    'visualage'                    =>   3.81,
2713
    'visualgen'                    =>   4.44,
2714
    'vpf'                          =>   0.84,
2715
    'vsrexx'                       =>   2.50,
2716
    'vulcan'                       =>   1.25,
2717
    'vz programmer'                =>   2.22,
2718
    'warp x'                       =>   2.00,
2719
    'watcom c'                     =>   0.63,
2720
    'watcom c/386'                 =>   0.63,
2721
    'waterloo c'                   =>   0.63,
2722
    'waterloo pascal'              =>   0.88,
2723
    'watfiv'                       =>   0.94,
2724
    'watfor'                       =>   0.88,
2725
    'web scripts'                  =>   5.33,
2726
    'whip'                         =>   0.88,
2727
    'wizard'                       =>   2.86,
2728
    'xlisp'                        =>   1.25,
2729
    'yacc'                         =>   1.51,
2730
    'yacc++'                       =>   1.51,
2731
    'YAML'                         =>   0.90,
2732
    'zbasic'                       =>   0.88,
2733
    'zim'                          =>   4.21,
2734
    'zlisp'                        =>   1.25,
2735
 
2736
'Expect'  => 2.00,
2737
'C/C++ Header'  => 1.00, 
2738
'inc'     => 1.00,
2739
'lex'     => 1.00,
2740
'MATLAB'  => 4.00,
2741
'IDL'     => 3.80,
2742
'Octave'  => 4.00,
2743
'ML'      => 3.00,
2744
'Modula3' => 2.00,
2745
'PHP'     => 3.50,
2746
'Python'  => 4.20,
2747
'Ruby'    => 4.20,
2748
'sed'     => 4.00,
2749
'Lua'     => 4.00,
2750
);
2751
# 1}}}
2752
%{$rh_Known_Binary_Archives} = (                 # {{{1
2753
            '.tar'     => 1 ,
2754
            '.tar.Z'   => 1 ,
2755
            '.tar.gz'  => 1 ,
2756
            '.tar.bz2' => 1 ,
2757
            '.zip'     => 1 ,
2758
            '.Zip'     => 1 ,
2759
            '.ZIP'     => 1 ,
2760
            '.ear'     => 1 ,  # Java
2761
            '.war'     => 1 ,  # contained within .ear
2762
            );
2763
# 1}}}
2764
} # end sub set_constants()
2765
sub Install_Regexp_Common {                  # {{{1
2766
    # Installs portions of Damian Conway's & Abigail's Regexp::Common
2767
    # module, v2.120, into a temporary directory for the duration of
2768
    # this run.
2769
 
2770
    my %Regexp_Common_Contents = ();
2771
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
2772
package Regexp::Common;
2773
 
2774
use 5.00473;
2775
use strict;
2776
 
2777
local $^W = 1;
2778
 
2779
use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
2780
 
2781
($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/;
2782
 
2783
 
2784
sub _croak {
2785
    require Carp;
2786
    goto &Carp::croak;
2787
}
2788
 
2789
sub _carp {
2790
    require Carp;
2791
    goto &Carp::carp;
2792
}
2793
 
2794
sub new {
2795
    my ($class, @data) = @_;
2796
    my %self;
2797
    tie %self, $class, @data;
2798
    return \%self;
2799
}
2800
 
2801
sub TIEHASH {
2802
    my ($class, @data) = @_;
2803
    bless \@data, $class;
2804
}
2805
 
2806
sub FETCH {
2807
    my ($self, $extra) = @_;
2808
    return bless ref($self)->new(@$self, $extra), ref($self);
2809
}
2810
 
2811
# Modification for cloc:  only need a few modules from Regexp::Common.
2812
my %imports = map {$_ => "Regexp::Common::$_"}
2813
              qw /balanced comment delimited /;
2814
#my %imports = map {$_ => "Regexp::Common::$_"}
2815
#              qw /balanced CC     comment   delimited lingua list
2816
#                  net      number profanity SEN       URI    whitespace
2817
#                  zip/;
2818
 
2819
sub import {
2820
    shift;  # Shift off the class.
2821
    tie %RE, __PACKAGE__;
2822
    {
2823
        no strict 'refs';
2824
        *{caller() . "::RE"} = \%RE;
2825
    }
2826
 
2827
    my $saw_import;
2828
    my $no_defaults;
2829
    my %exclude;
2830
    foreach my $entry (grep {!/^RE_/} @_) {
2831
        if ($entry eq 'pattern') {
2832
            no strict 'refs';
2833
            *{caller() . "::pattern"} = \&pattern;
2834
            next;
2835
        }
2836
        # This used to prevent $; from being set. We still recognize it,
2837
        # but we won't do anything.
2838
        if ($entry eq 'clean') {
2839
            next;
2840
        }
2841
        if ($entry eq 'no_defaults') {
2842
            $no_defaults ++;
2843
            next;
2844
        }
2845
        if (my $module = $imports {$entry}) {
2846
            $saw_import ++;
2847
            eval "require $module;";
2848
            die $@ if $@;
2849
            next;
2850
        }
2851
        if ($entry =~ /^!(.*)/ && $imports {$1}) {
2852
            $exclude {$1} ++;
2853
            next;
2854
        }
2855
        # As a last resort, try to load the argument.
2856
        my $module = $entry =~ /^Regexp::Common/
2857
                            ? $entry
2858
                            : "Regexp::Common::" . $entry;
2859
        eval "require $module;";
2860
        die $@ if $@;
2861
    }
2862
 
2863
    unless ($saw_import || $no_defaults) {
2864
        foreach my $module (values %imports) {
2865
            next if $exclude {$module};
2866
            eval "require $module;";
2867
            die $@ if $@;
2868
        }
2869
    }
2870
 
2871
    my %exported;
2872
    foreach my $entry (grep {/^RE_/} @_) {
2873
        if ($entry =~ /^RE_(\w+_)?ALL$/) {
2874
            my $m  = defined $1 ? $1 : "";
2875
            my $re = qr /^RE_${m}.*$/;
2876
            while (my ($sub, $interface) = each %sub_interface) {
2877
                next if $exported {$sub};
2878
                next unless $sub =~ /$re/;
2879
                {
2880
                    no strict 'refs';
2881
                    *{caller() . "::$sub"} = $interface;
2882
                }
2883
                $exported {$sub} ++;
2884
            }
2885
        }
2886
        else {
2887
            next if $exported {$entry};
2888
            _croak "Can't export unknown subroutine &$entry"
2889
                unless $sub_interface {$entry};
2890
            {
2891
                no strict 'refs';
2892
                *{caller() . "::$entry"} = $sub_interface {$entry};
2893
            }
2894
            $exported {$entry} ++;
2895
        }
2896
    }
2897
}
2898
 
2899
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
2900
 
2901
sub DESTROY {}
2902
 
2903
my %cache;
2904
 
2905
my $fpat = qr/^(-\w+)/;
2906
 
2907
sub _decache {
2908
        my @args = @{tied %{$_[0]}};
2909
        my @nonflags = grep {!/$fpat/} @args;
2910
        my $cache = get_cache(@nonflags);
2911
        _croak "Can't create unknown regex: \$RE{"
2912
            . join("}{",@args) . "}"
2913
                unless exists $cache->{__VAL__};
2914
        _croak "Perl $] does not support the pattern "
2915
            . "\$RE{" . join("}{",@args)
2916
            . "}.\nYou need Perl $cache->{__VAL__}{version} or later"
2917
                unless ($cache->{__VAL__}{version}||0) <= $];
2918
        my %flags = ( %{$cache->{__VAL__}{default}},
2919
                      map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
2920
                          : /$fpat/           ? ($1 => undef)
2921
                          :                     ()
2922
                          } @args);
2923
        $cache->{__VAL__}->_clone_with(\@args, \%flags);
2924
}
2925
 
2926
use overload q{""} => \&_decache;
2927
 
2928
 
2929
sub get_cache {
2930
        my $cache = \%cache;
2931
        foreach (@_) {
2932
                $cache = $cache->{$_}
2933
                      || ($cache->{$_} = {});
2934
        }
2935
        return $cache;
2936
}
2937
 
2938
sub croak_version {
2939
        my ($entry, @args) = @_;
2940
}
2941
 
2942
sub pattern {
2943
        my %spec = @_;
2944
        _croak 'pattern() requires argument: name => [ @list ]'
2945
                unless $spec{name} && ref $spec{name} eq 'ARRAY';
2946
        _croak 'pattern() requires argument: create => $sub_ref_or_string'
2947
                unless $spec{create};
2948
 
2949
        if (ref $spec{create} ne "CODE") {
2950
                my $fixed_str = "$spec{create}";
2951
                $spec{create} = sub { $fixed_str }
2952
        }
2953
 
2954
        my @nonflags;
2955
        my %default;
2956
        foreach ( @{$spec{name}} ) {
2957
                if (/$fpat=(.*)/) {
2958
                        $default{$1} = $2;
2959
                }
2960
                elsif (/$fpat\s*$/) {
2961
                        $default{$1} = undef;
2962
                }
2963
                else {
2964
                        push @nonflags, $_;
2965
                }
2966
        }
2967
 
2968
        my $entry = get_cache(@nonflags);
2969
 
2970
        if ($entry->{__VAL__}) {
2971
                _carp "Overriding \$RE{"
2972
                   . join("}{",@nonflags)
2973
                   . "}";
2974
        }
2975
 
2976
        $entry->{__VAL__} = bless {
2977
                                create  => $spec{create},
2978
                                match   => $spec{match} || \&generic_match,
2979
                                subs    => $spec{subs}  || \&generic_subs,
2980
                                version => $spec{version},
2981
                                default => \%default,
2982
                            }, 'Regexp::Common::Entry';
2983
 
2984
        foreach (@nonflags) {s/\W/X/g}
2985
        my $subname = "RE_" . join ("_", @nonflags);
2986
        $sub_interface{$subname} = sub {
2987
                push @_ => undef if @_ % 2;
2988
                my %flags = @_;
2989
                my $pat = $spec{create}->($entry->{__VAL__},
2990
                               {%default, %flags}, \@nonflags);
2991
                if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
2992
                else { $pat =~ s/\Q(?k:/(?:/g; }
2993
                return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
2994
        };
2995
 
2996
        return 1;
2997
}
2998
 
2999
sub generic_match {$_ [1] =~  /$_[0]/}
3000
sub generic_subs  {$_ [1] =~ s/$_[0]/$_[2]/}
3001
 
3002
sub matches {
3003
        my ($self, $str) = @_;
3004
        my $entry = $self -> _decache;
3005
        $entry -> {match} -> ($entry, $str);
3006
}
3007
 
3008
sub subs {
3009
        my ($self, $str, $newstr) = @_;
3010
        my $entry = $self -> _decache;
3011
        $entry -> {subs} -> ($entry, $str, $newstr);
3012
        return $str;
3013
}
3014
 
3015
 
3016
package Regexp::Common::Entry;
3017
# use Carp;
3018
 
3019
local $^W = 1;
3020
 
3021
use overload
3022
    q{""} => sub {
3023
        my ($self) = @_;
3024
        my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
3025
        if (exists $self->{flags}{-keep}) {
3026
            $pat =~ s/\Q(?k:/(/g;
3027
        }
3028
        else {
3029
            $pat =~ s/\Q(?k:/(?:/g;
3030
        }
3031
        if (exists $self->{flags}{-i})   { $pat = "(?i)$pat" }
3032
        return $pat;
3033
    };
3034
 
3035
sub _clone_with {
3036
    my ($self, $args, $flags) = @_;
3037
    bless { %$self, args=>$args, flags=>$flags }, ref $self;
3038
}
3039
# 
3040
#    Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights
3041
#  Reserved. This module is free software. It may be used, redistributed
3042
#      and/or modified under the terms of the Perl Artistic License
3043
#            (see http://www.perl.com/perl/misc/Artistic.html)
3044
EOCommon
3045
# 2}}}
3046
$Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
3047
# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $
3048
 
3049
package Regexp::Common::comment;
3050
 
3051
use strict;
3052
local $^W = 1;
3053
 
3054
use Regexp::Common qw /pattern clean no_defaults/;
3055
use vars qw /$VERSION/;
3056
 
3057
($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g;
3058
 
3059
my @generic = (
3060
    {languages => [qw /ABC Forth/],
3061
     to_eol    => ['\\\\']},   # This is for just a *single* backslash.
3062
 
3063
    {languages => [qw /Ada Alan Eiffel lua/],
3064
     to_eol    => ['--']},
3065
 
3066
    {languages => [qw /Advisor/],
3067
     to_eol    => ['#|//']},
3068
 
3069
    {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
3070
                       SMITH zonefile/],
3071
     to_eol    => [';']},
3072
 
3073
    {languages => ['Algol 60'],
3074
     from_to   => [[qw /comment ;/]]},
3075
 
3076
    {languages => [qw {ALPACA B C C-- LPC PL/I}],
3077
     from_to   => [[qw {/* */}]]},
3078
 
3079
    {languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/],
3080
     to_eol    => ['#']},
3081
 
3082
    {languages => [[BASIC => 'mvEnterprise']],
3083
     to_eol    => ['[*!]|REM']},
3084
 
3085
    {languages => [qw /Befunge-98 Funge-98 Shelta/],
3086
     id        => [';']},
3087
 
3088
    {languages => ['beta-Juliet', 'Crystal Report', 'Portia'],
3089
     to_eol    => ['//']},
3090
 
3091
    {languages => ['BML'],
3092
     from_to   => [['<?_c', '_c?>']],
3093
    },
3094
 
3095
    {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
3096
     to_eol    => ['//'],
3097
     from_to   => [[qw {/* */}]]},
3098
 
3099
    {languages => [qw /CLU LaTeX slrn TeX/],
3100
     to_eol    => ['%']},
3101
 
3102
    {languages => [qw /False/],
3103
     from_to   => [[qw !{ }!]]},
3104
 
3105
    {languages => [qw /Fortran/],
3106
     to_eol    => ['!']},
3107
 
3108
    {languages => [qw /Haifu/],
3109
     id        => [',']},
3110
 
3111
    {languages => [qw /ILLGOL/],
3112
     to_eol    => ['NB']},
3113
 
3114
    {languages => [qw /INTERCAL/],
3115
     to_eol    => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
3116
 
3117
    {languages => [qw /J/],
3118
     to_eol    => ['NB[.]']},
3119
 
3120
    {languages => [qw /Nickle/],
3121
     to_eol    => ['#'],
3122
     from_to   => [[qw {/* */}]]},
3123
 
3124
    {languages => [qw /Oberon/],
3125
     from_to   => [[qw /(* *)/]]},
3126
 
3127
    {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
3128
     to_eol    => ['//'],
3129
     from_to   => [[qw !{ }!], [qw !(* *)!]]},
3130
 
3131
    {languages => [[qw /Pascal Workshop/]],
3132
     id        => [qw /"/],
3133
     from_to   => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
3134
 
3135
    {languages => [qw /PEARL/],
3136
     to_eol    => ['!'],
3137
     from_to   => [[qw {/* */}]]},
3138
 
3139
    {languages => [qw /PHP/],
3140
     to_eol    => ['#', '//'],
3141
     from_to   => [[qw {/* */}]]},
3142
 
3143
    {languages => [qw !PL/B!],
3144
     to_eol    => ['[.;]']},
3145
 
3146
    {languages => [qw !PL/SQL!],
3147
     to_eol    => ['--'],
3148
     from_to   => [[qw {/* */}]]},
3149
 
3150
    {languages => [qw /Q-BAL/],
3151
     to_eol    => ['`']},
3152
 
3153
    {languages => [qw /Smalltalk/],
3154
     id        => ['"']},
3155
 
3156
    {languages => [qw /SQL/],
3157
     to_eol    => ['-{2,}']},
3158
 
3159
    {languages => [qw /troff/],
3160
     to_eol    => ['\\\"']},
3161
 
3162
    {languages => [qw /vi/],
3163
     to_eol    => ['"']},
3164
 
3165
    {languages => [qw /*W/],
3166
     from_to   => [[qw {|| !!}]]},
3167
);
3168
 
3169
my @plain_or_nested = (
3170
   [Caml         =>  undef,       "(*"  => "*)"],
3171
   [Dylan        =>  "//",        "/*"  => "*/"],
3172
   [Haskell      =>  "-{2,}",     "{-"  => "-}"],
3173
   [Hugo         =>  "!(?!\\\\)", "!\\" => "\\!"],
3174
   [SLIDE        =>  "#",         "(*"  => "*)"],
3175
);
3176
 
3177
#
3178
# Helper subs.
3179
#
3180
 
3181
sub combine      {
3182
    local $_ = join "|", @_;
3183
    if (@_ > 1) {
3184
        s/\(\?k:/(?:/g;
3185
        $_ = "(?k:$_)";
3186
    }
3187
    $_
3188
}
3189
 
3190
sub to_eol  ($)  {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
3191
sub id      ($)  {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"}  # One char only!
3192
sub from_to      {
3193
    local $^W = 1;
3194
    my ($begin, $end) = @_;
3195
 
3196
    my $qb  = quotemeta $begin;
3197
    my $qe  = quotemeta $end;
3198
    my $fe  = quotemeta substr $end   => 0, 1;
3199
    my $te  = quotemeta substr $end   => 1;
3200
 
3201
    "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
3202
}
3203
 
3204
 
3205
my $count = 0;
3206
sub nested {
3207
    local $^W = 1;
3208
    my ($begin, $end) = @_;
3209
 
3210
    $count ++;
3211
    my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
3212
 
3213
    my $qb  = quotemeta $begin;
3214
    my $qe  = quotemeta $end;
3215
    my $fb  = quotemeta substr $begin => 0, 1;
3216
    my $fe  = quotemeta substr $end   => 0, 1;
3217
 
3218
    my $tb  = quotemeta substr $begin => 1;
3219
    my $te  = quotemeta substr $end   => 1;
3220
 
3221
    use re 'eval';
3222
 
3223
    my $re;
3224
    if ($fb eq $fe) {
3225
        $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
3226
    }
3227
    else {
3228
        local $"      =  "|";
3229
        my   @clauses =  "(?>[^$fb$fe]+)";
3230
        push @clauses => "$fb(?!$tb)" if length $tb;
3231
        push @clauses => "$fe(?!$te)" if length $te;
3232
        push @clauses =>  $r;
3233
        $re           =   qr /(?:$qb(?:@clauses)*$qe)/;
3234
    }
3235
 
3236
    $Regexp::Common::comment [$count] = qr/$re/;
3237
}
3238
 
3239
#
3240
# Process data.
3241
#
3242
 
3243
foreach my $info (@plain_or_nested) {
3244
    my ($language, $mark, $begin, $end) = @$info;
3245
    pattern name    => [comment => $language],
3246
            create  =>
3247
                sub {my $re     = nested $begin => $end;
3248
                     my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
3249
                     exists $_ [1] -> {-keep} ? qr /($prefix$re)/
3250
                                              : qr  /$prefix$re/
3251
                },
3252
            version => 5.006,
3253
            ;
3254
}
3255
 
3256
 
3257
foreach my $group (@generic) {
3258
    my $pattern = combine +(map {to_eol   $_} @{$group -> {to_eol}}),
3259
                           (map {from_to @$_} @{$group -> {from_to}}),
3260
                           (map {id       $_} @{$group -> {id}}),
3261
                  ;
3262
    foreach my $language  (@{$group -> {languages}}) {
3263
        pattern name    => [comment => ref $language ? @$language : $language],
3264
                create  => $pattern,
3265
                ;
3266
    }
3267
}
3268
 
3269
 
3270
 
3271
#
3272
# Other languages.
3273
#
3274
 
3275
# http://www.pascal-central.com/docs/iso10206.txt
3276
pattern name    => [qw /comment Pascal/],
3277
        create  => '(?k:' . '(?k:[{]|[(][*])'
3278
                          . '(?k:[^}*]*(?:[*][^)][^}*]*)*)'
3279
                          . '(?k:[}]|[*][)])'
3280
                          . ')'
3281
        ;
3282
 
3283
# http://www.templetons.com/brad/alice/language/
3284
pattern name    =>  [qw /comment Pascal Alice/],
3285
        create  =>  '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
3286
        ;
3287
 
3288
 
3289
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
3290
pattern name    => [qw (comment), 'Algol 68'],
3291
        create  => q {(?k:(?:#[^#]*#)|}                           .
3292
                   q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
3293
                   q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
3294
        ;
3295
 
3296
 
3297
# See rules 91 and 92 of ISO 8879 (SGML).
3298
# Charles F. Goldfarb: "The SGML Handbook".
3299
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
3300
# Ch. 10.3, pp 390.
3301
pattern name    => [qw (comment HTML)],
3302
        create  => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
3303
        ;
3304
 
3305
 
3306
pattern name    => [qw /comment SQL MySQL/],
3307
        create  => q {(?k:(?:#|-- )[^\n]*\n|} .
3308
                   q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
3309
        ;
3310
 
3311
# Anything that isn't <>[]+-.,
3312
# http://home.wxs.nl/~faase009/Ha_BF.html
3313
pattern name    => [qw /comment Brainfuck/],
3314
        create  => '(?k:[^<>\[\]+\-.,]+)'
3315
        ;
3316
 
3317
# Squeak is a variant of Smalltalk-80.
3318
# http://www.squeak.
3319
# http://mucow.com/squeak-qref.html
3320
pattern name    => [qw /comment Squeak/],
3321
        create  => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
3322
        ;
3323
 
3324
#
3325
# Scores of less than 5 or above 17....
3326
# http://www.cliff.biffle.org/esoterica/beatnik.html
3327
@Regexp::Common::comment::scores = (1,  3,  3,  2,  1,  4,  2,  4,  1,  8,
3328
                                    5,  1,  3,  1,  1,  3, 10,  1,  1,  1,
3329
                                    1,  4,  4,  8,  4, 10);
3330
pattern name    =>  [qw /comment Beatnik/],
3331
        create  =>  sub {
3332
            use re 'eval';
3333
            my ($s, $x);
3334
            my $re = qr {\b([A-Za-z]+)\b
3335
                         (?(?{($s, $x) = (0, lc $^N);
3336
                              $s += $Regexp::Common::comment::scores
3337
                                    [ord (chop $x) - ord ('a')] while length $x;
3338
                              $s  >= 5 && $s < 18})XXX|)}x;
3339
            $re;
3340
        },
3341
        version  => 5.008,
3342
        ;
3343
 
3344
 
3345
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
3346
#  (Goto table of contents/3.3 Source Form)
3347
# Fortran, in fixed format. Comments start with a C, c or * in the first
3348
# column, or a ! anywhere, but the sixth column. Then end with a newline.
3349
pattern name    =>  [qw /comment Fortran fixed/],
3350
        create  =>  '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
3351
        ;
3352
 
3353
 
3354
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
3355
# Traditionally, comments in COBOL were indicated with an asteriks in
3356
# the seventh column. Modern compilers may be more lenient.
3357
pattern name    =>  [qw /comment COBOL/],
3358
        create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
3359
        version =>  '5.008',
3360
        ;
3361
 
3362
1;
3363
#
3364
#    Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
3365
#      This module is free software. It may be used, redistributed
3366
#     and/or modified under the terms of the Perl Artistic License
3367
#           (see http://www.perl.com/perl/misc/Artistic.html)
3368
EOC
3369
# 2}}}
3370
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
3371
package Regexp::Common::balanced; {
3372
 
3373
use strict;
3374
local $^W = 1;
3375
 
3376
use vars qw /$VERSION/;
3377
($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g;
3378
 
3379
use Regexp::Common qw /pattern clean no_defaults/;
3380
 
3381
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
3382
my $count = -1;
3383
my %cache;
3384
 
3385
sub nested {
3386
    local $^W = 1;
3387
    my ($start, $finish) = @_;
3388
 
3389
    return $Regexp::Common::balanced [$cache {$start} {$finish}]
3390
            if exists $cache {$start} {$finish};
3391
 
3392
    $count ++;
3393
    my $r = '(??{$Regexp::Common::balanced ['. $count . ']})';
3394
 
3395
    my @starts   = map {s/\\(.)/$1/g; $_} grep {length}
3396
                        $start  =~ /([^|\\]+|\\.)+/gs;
3397
    my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
3398
                        $finish =~ /([^|\\]+|\\.)+/gs;
3399
 
3400
    push @finishes => ($finishes [-1]) x (@starts - @finishes);
3401
 
3402
    my @re;
3403
    local $" = "|";
3404
    foreach my $begin (@starts) {
3405
        my $end = shift @finishes;
3406
 
3407
        my $qb  = quotemeta $begin;
3408
        my $qe  = quotemeta $end;
3409
        my $fb  = quotemeta substr $begin => 0, 1;
3410
        my $fe  = quotemeta substr $end   => 0, 1;
3411
 
3412
        my $tb  = quotemeta substr $begin => 1;
3413
        my $te  = quotemeta substr $end   => 1;
3414
 
3415
        use re 'eval';
3416
 
3417
        my $add;
3418
        if ($fb eq $fe) {
3419
            push @re =>
3420
                   qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
3421
        }
3422
        else {
3423
            my   @clauses =  "(?>[^$fb$fe]+)";
3424
            push @clauses => "$fb(?!$tb)" if length $tb;
3425
            push @clauses => "$fe(?!$te)" if length $te;
3426
            push @clauses =>  $r;
3427
            push @re      =>  qr /(?:$qb(?:@clauses)*$qe)/;
3428
        }
3429
    }
3430
 
3431
    $cache {$start} {$finish} = $count;
3432
    $Regexp::Common::balanced [$count] = qr/@re/;
3433
}
3434
 
3435
 
3436
pattern name    => [qw /balanced -parens=() -begin= -end=/],
3437
        create  => sub {
3438
            my $flag = $_[1];
3439
            unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
3440
                    defined $flag -> {-end}   && length $flag -> {-end}) {
3441
                my @open  = grep {index ($flag->{-parens}, $_) >= 0}
3442
                             ('[','(','{','<');
3443
                my @close = map {$closer {$_}} @open;
3444
                $flag -> {-begin} = join "|" => @open;
3445
                $flag -> {-end}   = join "|" => @close;
3446
            }
3447
            my $pat = nested @$flag {qw /-begin -end/};
3448
            return exists $flag -> {-keep} ? qr /($pat)/ : $pat;
3449
        },
3450
        version => 5.006,
3451
        ;
3452
 
3453
}
3454
 
3455
1;
3456
#
3457
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
3458
#       This module is free software. It may be used, redistributed
3459
#      and/or modified under the terms of the Perl Artistic License
3460
#            (see http://www.perl.com/perl/misc/Artistic.html)
3461
EOB
3462
# 2}}}
3463
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
3464
# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $
3465
 
3466
package Regexp::Common::delimited;
3467
 
3468
use strict;
3469
local $^W = 1;
3470
 
3471
use Regexp::Common qw /pattern clean no_defaults/;
3472
use vars qw /$VERSION/;
3473
 
3474
($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g;
3475
 
3476
sub gen_delimited {
3477
 
3478
    my ($dels, $escs) = @_;
3479
    # return '(?:\S*)' unless $dels =~ /\S/;
3480
    if (length $escs) {
3481
        $escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
3482
    }
3483
    my @pat = ();
3484
    my $i;
3485
    for ($i=0; $i < length $dels; $i++) {
3486
        my $del = quotemeta substr ($dels, $i, 1);
3487
        my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
3488
        if ($del eq $esc) {
3489
            push @pat,
3490
                 "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
3491
        }
3492
        elsif (length $esc) {
3493
            push @pat,
3494
                 "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
3495
        }
3496
        else {
3497
            push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
3498
        }
3499
    }
3500
    my $pat = join '|', @pat;
3501
    return "(?k:$pat)";
3502
}
3503
 
3504
sub _croak {
3505
    require Carp;
3506
    goto &Carp::croak;
3507
}
3508
 
3509
pattern name   => [qw( delimited -delim= -esc=\\ )],
3510
        create => sub {my $flags = $_[1];
3511
                       _croak 'Must specify delimiter in $RE{delimited}'
3512
                             unless length $flags->{-delim};
3513
                       return gen_delimited (@{$flags}{-delim, -esc});
3514
                  },
3515
        ;
3516
 
3517
pattern name   => [qw( quoted -esc=\\ )],
3518
        create => sub {my $flags = $_[1];
3519
                       return gen_delimited (q{"'`}, $flags -> {-esc});
3520
                  },
3521
        ;
3522
 
3523
 
3524
1;
3525
#
3526
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
3527
#       This module is free software. It may be used, redistributed
3528
#      and/or modified under the terms of the Perl Artistic License
3529
#            (see http://www.perl.com/perl/misc/Artistic.html)
3530
EOD
3531
# 2}}}
3532
    my $problems        = 0;
3533
    $HAVE_Rexexp_Common = 0;
3534
    my $dir             = "";
3535
    if ($opt_sdir) {
3536
        # write to the user-defined scratch directory
3537
        $dir = $opt_sdir;
3538
    } else {
3539
        # let File::Temp create a suitable temporary directory
3540
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
3541
    }
3542
    print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
3543
    my $Regexp_dir        = "$dir/Regexp";
3544
    my $Regexp_Common_dir = "$dir/Regexp/Common";
3545
    mkdir $Regexp_dir       ;
3546
    mkdir $Regexp_Common_dir;
3547
 
3548
    foreach my $module_file (keys %Regexp_Common_Contents) {
3549
        my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w";
3550
        if (defined $OUT) {
3551
            print $OUT $Regexp_Common_Contents{$module_file};
3552
            $OUT->close;
3553
        } else {
3554
            warn "Failed to install Regexp::${module_file}.pm\n";
3555
            $problems = 1;
3556
        }
3557
    }
3558
 
3559
    push @INC, $dir;
3560
    eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
3561
    $HAVE_Rexexp_Common = 1 unless $problems;
3562
} # 1}}}
3563
sub call_regexp_common {                     # {{{1
3564
    my ($ra_lines, $language ) = @_;
3565
    print "-> call_regexp_common\n" if $opt_v > 2;
3566
 
3567
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
3568
 
3569
    my $all_lines = join("", @{$ra_lines});
3570
 
3571
    no strict 'vars';
3572
    # otherwise get:
3573
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
3574
    if ($all_lines =~ $RE{comment}{$language}) {
3575
        # Suppress "Use of uninitialized value in regexp compilation" that
3576
        # pops up when $1 is undefined--happens if there's a bug in the $RE
3577
        # This Pascal comment will trigger it:
3578
        #         (* This is { another } test. **)
3579
        # Curiously, testing for "defined $1" breaks the substitution.
3580
        no warnings; 
3581
 
3582
        $all_lines =~ s/$1//g;
3583
    }
3584
    # a bogus use of %RE to avoid:
3585
    # Name "main::RE" used only once: possible typo at cloc line xx.
3586
    print scalar keys %RE if $opt_v < -20;
3587
 
3588
    print "<- call_regexp_common\n" if $opt_v > 2;
3589
    return split("\n", $all_lines);
3590
} # 1}}}
3591
sub plural_form {                            # {{{1
3592
    # For getting the right plural form on some English nouns.
3593
    my $n = shift @_;
3594
    if ($n == 1) { return ( 1, "" ); }
3595
    else         { return ($n, "s"); }
3596
} # 1}}}
3597
sub matlab_or_objective_C {                  # {{{1
3598
    # Decide if code is MATLAB, Objective C, or MUMPS
3599
    my ($file        , # in
3600
        $rh_Err      , # in   hash of error codes
3601
        $raa_errors  , # out
3602
        $rs_language , # out
3603
       ) = @_;
3604
 
3605
    print "-> matlab_or_objective_C\n" if $opt_v > 2;
3606
    # matlab markers:
3607
    #   first line starts with "function"
3608
    #   some lines start with "%"
3609
    #   high marks for lines that start with [
3610
    #
3611
    # Objective C markers:
3612
    #   must have at least two brace characters, { }
3613
    #   has /* ... */ style comments
3614
    #   some lines start with @
3615
    #   some lines start with #include
3616
    #
3617
    # MUMPS:
3618
    #   has ; comment markers
3619
    #   do not match:  \w+\s*=\s*\w
3620
    #   lines begin with   \s*\.?\w+\s+\w
3621
    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
3622
 
3623
    ${$rs_language} = "";
3624
    my $IN = new IO::File $file, "r";
3625
    if (!defined $IN) {
3626
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
3627
        return;
3628
    }
3629
 
3630
    my $matlab_points      = 0;
3631
    my $objective_C_points = 0;
3632
    my $mumps_points       = 0;
3633
    my $has_braces         = 0;
3634
    while (<$IN>) {
3635
        ++$has_braces if m/[{}]/;
3636
        ++$mumps_points if $. == 1 and m{^[A-Z]};
3637
        if      (m{^\s*/\*}) {           #   /*
3638
            ++$objective_C_points;
3639
            --$matlab_points;
3640
        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
3641
            --$mumps_points;
3642
        } elsif (m{^\s*\.?\w\s+\w}) {
3643
            ++$mumps_points;
3644
        } elsif (m{^\s*;}) {
3645
            ++$mumps_points;
3646
        } elsif (m{^\s*#include}) {
3647
            ++$objective_C_points;
3648
            --$matlab_points;
3649
        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
3650
            # Objective C without a doubt
3651
            $objective_C_points = 1;
3652
            $matlab_points      = 0;
3653
            last;
3654
        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
3655
            $matlab_points += 5;
3656
        } elsif (m{^\sK(ill)?\s+}) {
3657
            $mumps_points  += 5;
3658
        } elsif (m{^\s*function}) {
3659
            --$objective_C_points;
3660
            ++$matlab_points;
3661
        } elsif (m{^\s*%}) {              #   %
3662
            --$objective_C_points;
3663
            ++$matlab_points;
3664
            ++$mumps_points;
3665
        }
3666
    }
3667
    $IN->close;
3668
 
3669
    print "<- matlab_or_objective_C(matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points)\n"
3670
        if $opt_v > 2;
3671
    $objective_C_points = -9.9e20 unless $has_braces >= 2;
3672
    if      (($matlab_points > $objective_C_points) and
3673
             ($matlab_points > $mumps_points)      ) {
3674
        ${$rs_language} = "MATLAB";
3675
    } elsif (($mumps_points > $objective_C_points) and
3676
             ($mumps_points > $matlab_points)      ) {
3677
        ${$rs_language} = "MUMPS";
3678
    } else {
3679
        ${$rs_language} = "Objective C";
3680
    }
3681
 
3682
} # 1}}}
3683
 
3684
# subroutines copied from SLOCCount
3685
my %lex_files    = ();  # really_is_lex()
3686
my %expect_files = ();  # really_is_expect()
3687
my %pascal_files = ();  # really_is_pascal(), really_is_incpascal()
3688
my %php_files    = ();  # really_is_php()
3689
sub really_is_lex {                          # {{{1
3690
# Given filename, returns TRUE if its contents really is lex.
3691
# lex file must have "%%", "%{", and "%}".
3692
# In theory, a lex file doesn't need "%{" and "%}", but in practice
3693
# they all have them, and requiring them avoid mislabeling a
3694
# non-lexfile as a lex file.
3695
 
3696
 my $filename = shift;
3697
 chomp($filename);
3698
 
3699
 my $is_lex = 0;      # Value to determine.
3700
 my $percent_percent = 0;
3701
 my $percent_opencurly = 0;
3702
 my $percent_closecurly = 0;
3703
 
3704
 # Return cached result, if available:
3705
 if ($lex_files{$filename}) { return $lex_files{$filename};}
3706
 
3707
 open(LEX_FILE, "<$filename") ||
3708
      die "Can't open $filename to determine if it's lex.\n";
3709
 while(<LEX_FILE>) {
3710
   $percent_percent++     if (m/^\s*\%\%/);
3711
   $percent_opencurly++   if (m/^\s*\%\{/);
3712
   $percent_closecurly++   if (m/^\s*\%\}/);
3713
 }
3714
 close(LEX_FILE);
3715
 
3716
 if ($percent_percent && $percent_opencurly && $percent_closecurly)
3717
          {$is_lex = 1;}
3718
 
3719
 $lex_files{$filename} = $is_lex; # Store result in cache.
3720
 
3721
 return $is_lex;
3722
} # 1}}}
3723
sub really_is_expect {                       # {{{1
3724
# Given filename, returns TRUE if its contents really are Expect.
3725
# Many "exp" files (such as in Apache and Mesa) are just "export" data,
3726
# summarizing something else # (e.g., its interface).
3727
# Sometimes (like in RPM) it's just misc. data.
3728
# Thus, we need to look at the file to determine
3729
# if it's really an "expect" file.
3730
 
3731
 my $filename = shift;
3732
 chomp($filename);
3733
 
3734
# The heuristic is as follows: it's Expect _IF_ it:
3735
# 1. has "load_lib" command and either "#" comments or {}.
3736
# 2. {, }, and one of: proc, if, [...], expect
3737
 
3738
 my $is_expect = 0;      # Value to determine.
3739
 
3740
 my $begin_brace = 0;  # Lines that begin with curly braces.
3741
 my $end_brace = 0;    # Lines that begin with curly braces.
3742
 my $load_lib = 0;     # Lines with the Load_lib command.
3743
 my $found_proc = 0;
3744
 my $found_if = 0;
3745
 my $found_brackets = 0;
3746
 my $found_expect = 0;
3747
 my $found_pound = 0;
3748
 
3749
 # Return cached result, if available:
3750
 if ($expect_files{$filename}) { return expect_files{$filename};}
3751
 
3752
 open(EXPECT_FILE, "<$filename") ||
3753
      die "Can't open $filename to determine if it's expect.\n";
3754
 while(<EXPECT_FILE>) {
3755
 
3756
   if (m/#/) {$found_pound++; s/#.*//;}
3757
   if (m/^\s*\{/) { $begin_brace++;}
3758
   if (m/\{\s*$/) { $begin_brace++;}
3759
   if (m/^\s*\}/) { $end_brace++;}
3760
   if (m/\};?\s*$/) { $end_brace++;}
3761
   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
3762
   if (m/^\s*proc\s/) { $found_proc++;}
3763
   if (m/^\s*if\s/) { $found_if++;}
3764
   if (m/\[.*\]/) { $found_brackets++;}
3765
   if (m/^\s*expect\s/) { $found_expect++;}
3766
 }
3767
 close(EXPECT_FILE);
3768
 
3769
 if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
3770
          {$is_expect = 1;}
3771
 if ( $begin_brace && $end_brace &&
3772
      ($found_proc || $found_if || $found_brackets || $found_expect))
3773
          {$is_expect = 1;}
3774
 
3775
 $expect_files{$filename} = $is_expect; # Store result in cache.
3776
 
3777
 return $is_expect;
3778
} # 1}}}
3779
sub really_is_pascal {                       # {{{1
3780
# Given filename, returns TRUE if its contents really are Pascal.
3781
 
3782
# This isn't as obvious as it seems.
3783
# Many ".p" files are Perl files
3784
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
3785
# others are C extractions
3786
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
3787
# and some files in linuxconf).
3788
# However, test files in "p2c" really are Pascal, for example.
3789
 
3790
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
3791
# is actually C code.  The heuristics determine that they're not Pascal,
3792
# but because it ends in ".p" it's not counted as C code either.
3793
# I believe this is actually correct behavior, because frankly it
3794
# looks like it's automatically generated (it's a bitmap expressed as code).
3795
# Rather than guess otherwise, we don't include it in a list of
3796
# source files.  Let's face it, someone who creates C files ending in ".p"
3797
# and expects them to be counted by default as C files in SLOCCount needs
3798
# their head examined.  I suggest examining their head
3799
# with a sucker rod (see syslogd(8) for more on sucker rods).
3800
 
3801
# This heuristic counts as Pascal such files such as:
3802
#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
3803
# Which is hand-generated.  We don't count woven documents now anyway,
3804
# so this is justifiable.
3805
 
3806
 my $filename = shift;
3807
 chomp($filename);
3808
 
3809
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
3810
# (ignoring {...} and (*...*) comments):
3811
# 1. "^..program NAME" or "^..unit NAME",
3812
# 2. "procedure", "function", "^..interface", or "^..implementation",
3813
# 3. a "begin", and
3814
# 4. it ends with "end.",
3815
#
3816
# Or it has all of the following:
3817
# 1. "^..module NAME" and
3818
# 2. it ends with "end.".
3819
#
3820
# Or it has all of the following:
3821
# 1. "^..program NAME",
3822
# 2. a "begin", and
3823
# 3. it ends with "end.".
3824
#
3825
# The "end." requirements in particular filter out non-Pascal.
3826
#
3827
# Note (jgb): this does not detect Pascal main files in fpc, like
3828
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
3829
# it
3830
 
3831
 my $is_pascal = 0;      # Value to determine.
3832
 
3833
 my $has_program = 0;
3834
 my $has_unit = 0;
3835
 my $has_module = 0;
3836
 my $has_procedure_or_function = 0;
3837
 my $found_begin = 0;
3838
 my $found_terminating_end = 0;
3839
 my $has_begin = 0;
3840
 
3841
 # Return cached result, if available:
3842
 if ($pascal_files{$filename}) { return pascal_files{$filename};}
3843
 
3844
 open(PASCAL_FILE, "<$filename") ||
3845
      die "Can't open $filename to determine if it's pascal.\n";
3846
 while(<PASCAL_FILE>) {
3847
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
3848
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
3849
   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
3850
   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
3851
   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
3852
   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
3853
   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
3854
   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
3855
   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
3856
   if (m/\bbegin\b/i) { $has_begin = 1; }
3857
   # Originally I said:
3858
   # "This heuristic fails if there are multi-line comments after
3859
   # "end."; I haven't seen that in real Pascal programs:"
3860
   # But jgb found there are a good quantity of them in Debian, specially in 
3861
   # fpc (at the end of a lot of files there is a multiline comment
3862
   # with the changelog for the file).
3863
   # Therefore, assume Pascal if "end." appears anywhere in the file.
3864
   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
3865
#   elsif (m/\S/) {$found_terminating_end = 0;}
3866
 }
3867
 close(PASCAL_FILE);
3868
 
3869
 # Okay, we've examined the entire file looking for clues;
3870
 # let's use those clues to determine if it's really Pascal:
3871
 
3872
 if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
3873
     $has_begin && $found_terminating_end ) ||
3874
      ( $has_module && $found_terminating_end ) ||
3875
      ( $has_program && $has_begin && $found_terminating_end ) )
3876
          {$is_pascal = 1;}
3877
 
3878
 $pascal_files{$filename} = $is_pascal; # Store result in cache.
3879
 
3880
 return $is_pascal;
3881
} # 1}}}
3882
sub really_is_incpascal {                    # {{{1
3883
# Given filename, returns TRUE if its contents really are Pascal.
3884
# For .inc files (mainly seen in fpc)
3885
 
3886
 my $filename = shift;
3887
 chomp($filename);
3888
 
3889
# The heuristic is as follows: it is Pacal if any of the following:
3890
# 1. really_is_pascal returns true
3891
# 2. Any usual reserverd word is found (program, unit, const, begin...)
3892
 
3893
 # If the general routine for Pascal files works, we have it
3894
 if (&really_is_pascal ($filename)) { 
3895
   $pascal_files{$filename} = 1;
3896
   return 1;
3897
 }
3898
 
3899
 my $is_pascal = 0;      # Value to determine.
3900
 my $found_begin = 0;
3901
 
3902
 open(PASCAL_FILE, "<$filename") ||
3903
      die "Can't open $filename to determine if it's pascal.\n";
3904
 while(<PASCAL_FILE>) {
3905
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
3906
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
3907
   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
3908
   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
3909
   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
3910
   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
3911
   if (m/\bfunction\b/i)          {$is_pascal = 1; }
3912
   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
3913
   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
3914
   if (m/\bconstant\s+/i)         {$is_pascal=1;}
3915
   if (m/\bbegin\b/i) { $found_begin = 1; }
3916
   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
3917
   if ($is_pascal) {
3918
     last;
3919
   }
3920
 }
3921
 
3922
 close(PASCAL_FILE);
3923
 $pascal_files{$filename} = $is_pascal; # Store result in cache.
3924
 return $is_pascal;
3925
} # 1}}}
3926
sub really_is_php {                          # {{{1
3927
# Given filename, returns TRUE if its contents really is php.
3928
 
3929
 my $filename = shift;
3930
 chomp($filename);
3931
 
3932
 my $is_php = 0;      # Value to determine.
3933
 # Need to find a matching pair of surrounds, with ending after beginning:
3934
 my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
3935
 my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
3936
 my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
3937
 
3938
 # Return cached result, if available:
3939
 if ($php_files{$filename}) { return $php_files{$filename};}
3940
 
3941
 open(PHP_FILE, "<$filename") ||
3942
      die "Can't open $filename to determine if it's php.\n";
3943
 while(<PHP_FILE>) {
3944
   if (m/\<\?/)                           { $normal_surround |= 1; }
3945
   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
3946
   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
3947
   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
3948
   if (m/\<\%/)                           { $asp_surround |= 1; }
3949
   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
3950
 }
3951
 close(PHP_FILE);
3952
 
3953
 if ( ($normal_surround == 3) || ($script_surround == 3) ||
3954
      ($asp_surround == 3)) {
3955
   $is_php = 1;
3956
 }
3957
 
3958
 $php_files{$filename} = $is_php; # Store result in cache.
3959
 
3960
 return $is_php;
3961
} # 1}}}
3962
__END__
3963
mode values (stat $item)[2]
3964
       Unix    Windows
3965
file:  33188   33206
3966
dir :  16832   16895
3967
link:  33261   33206
3968
pipe:   4544    null