Subversion Repositories DevTools

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5282 dpurdie 1
#!/usr/bin/env perl
2
# cloc -- Count Lines of Code                  {{{1
3
# Copyright (C) 2006-2015 Al Danial <al.danial@gmail.com>
4
# First release August 2006
5
#
6
# Includes code from:
7
#   - SLOCCount v2.26 
8
#     http://www.dwheeler.com/sloccount/
9
#     by David Wheeler.
10
#   - Regexp::Common v2.120
11
#     http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm
12
#     by Damian Conway and Abigail.
13
#   - Win32::Autoglob 
14
#     http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
15
#     by Sean M. Burke.
16
#   - Algorithm::Diff
17
#     http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
18
#     by Tye McQueen.
19
#
20
# This program is free software; you can redistribute it and/or modify
21
# it under the terms of the GNU General Public License as published by
22
# the Free Software Foundation; either version 2 of the License, or
23
# (at your option) any later version.
24
#
25
# This program is distributed in the hope that it will be useful,
26
# but WITHOUT ANY WARRANTY; without even the implied warranty of
27
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28
# GNU General Public License for more details:
29
# <http://www.gnu.org/licenses/gpl.txt>.
30
#
31
# 1}}}
32
my $VERSION = "1.65";  # odd number == beta; even number == stable
33
my $URL     = "https://github.com/AlDanial/cloc";
34
require 5.006;
35
# use modules                                  {{{1
36
use warnings;
37
use strict;
38
use Getopt::Long;
39
use File::Basename;
40
use File::Temp qw { tempfile tempdir };
41
use File::Find;
42
use File::Path;
43
use File::Spec;
44
use IO::File;
45
use POSIX "strftime";
46
 
47
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
48
my $HAVE_Digest_MD5 = 0;
49
eval "use Digest::MD5;";
50
if (defined $Digest::MD5::VERSION) {
51
    $HAVE_Digest_MD5 = 1;
52
} else {
53
    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
54
}
55
 
56
# Time::HiRes became standard with Perl 5.8
57
my $HAVE_Time_HiRes = 0;
58
eval "use Time::HiRes;";
59
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
60
 
61
my $HAVE_Rexexp_Common;
62
# Regexp::Common isn't in the standard distribution.  It will
63
# be installed in a temp directory if necessary.
64
BEGIN {
65
    if (eval "use Regexp::Common;") {
66
        $HAVE_Rexexp_Common = 1;
67
    } else {
68
        $HAVE_Rexexp_Common = 0;
69
    }
70
}
71
 
72
my $HAVE_Algorith_Diff = 0;
73
# Algorithm::Diff isn't in the standard distribution.  It will
74
# be installed in a temp directory if necessary.
75
eval "use Algorithm::Diff qw ( sdiff ) ";
76
if (defined $Algorithm::Diff::VERSION) {
77
    $HAVE_Algorith_Diff = 1;
78
} else {
79
    Install_Algorithm_Diff();
80
}
81
# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
82
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
83
 
84
# Uncomment next two lines when building Windows executable with perl2exe
85
# or if running on a system that already has Regexp::Common.
86
#use Regexp::Common;
87
#$HAVE_Rexexp_Common = 1;
88
 
89
#perl2exe_include "Regexp/Common/whitespace.pm"
90
#perl2exe_include "Regexp/Common/URI.pm"
91
#perl2exe_include "Regexp/Common/URI/fax.pm"
92
#perl2exe_include "Regexp/Common/URI/file.pm"
93
#perl2exe_include "Regexp/Common/URI/ftp.pm"
94
#perl2exe_include "Regexp/Common/URI/gopher.pm"
95
#perl2exe_include "Regexp/Common/URI/http.pm"
96
#perl2exe_include "Regexp/Common/URI/pop.pm"
97
#perl2exe_include "Regexp/Common/URI/prospero.pm"
98
#perl2exe_include "Regexp/Common/URI/news.pm"
99
#perl2exe_include "Regexp/Common/URI/tel.pm"
100
#perl2exe_include "Regexp/Common/URI/telnet.pm"
101
#perl2exe_include "Regexp/Common/URI/tv.pm"
102
#perl2exe_include "Regexp/Common/URI/wais.pm"
103
#perl2exe_include "Regexp/Common/CC.pm"
104
#perl2exe_include "Regexp/Common/SEN.pm"
105
#perl2exe_include "Regexp/Common/number.pm"
106
#perl2exe_include "Regexp/Common/delimited.pm"
107
#perl2exe_include "Regexp/Common/profanity.pm"
108
#perl2exe_include "Regexp/Common/net.pm"
109
#perl2exe_include "Regexp/Common/zip.pm"
110
#perl2exe_include "Regexp/Common/comment.pm"
111
#perl2exe_include "Regexp/Common/balanced.pm"
112
#perl2exe_include "Regexp/Common/lingua.pm"
113
#perl2exe_include "Regexp/Common/list.pm"
114
#perl2exe_include "File/Glob.pm"
115
 
116
use Text::Tabs qw { expand };
117
use Cwd qw { cwd };
118
use File::Glob;
119
# 1}}}
120
# Usage information, options processing.       {{{1
121
my $ON_WINDOWS = 0;
122
   $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
123
if ($ON_WINDOWS and $ENV{'SHELL'}) {
124
    if ($ENV{'SHELL'} =~ m{^/}) {
125
        $ON_WINDOWS = 0;  # make Cygwin look like Unix
126
    } else {
127
        $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
128
    }
129
}
130
 
131
my $NN     = chr(27) . "[0m";  # normal
132
   $NN     = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR:  is it a terminal?
133
my $BB     = chr(27) . "[1m";  # bold
134
   $BB     = "" if $ON_WINDOWS or !(-t STDERR);
135
my $script = basename $0;
136
my $usage  = "
137
Usage: $script [options] <file(s)/dir(s)> | <set 1> <set 2> | <report files>
138
 
139
 Count, or compute differences of, physical lines of source code in the
140
 given files (may be archives such as compressed tarballs or zip files)
141
 and/or recursively below the given directories.
142
 
143
 ${BB}Input Options${NN}
144
   --extract-with=<cmd>      This option is only needed if cloc is unable
145
                             to figure out how to extract the contents of
146
                             the input file(s) by itself.
147
                             Use <cmd> to extract binary archive files (e.g.:
148
                             .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
149
                             a stand-in for the actual file(s) to be
150
                             extracted.  For example, to count lines of code
151
                             in the input files
152
                                gcc-4.2.tar.gz  perl-5.8.8.tar.gz
153
                             on Unix use
154
                               --extract-with='gzip -dc >FILE< | tar xf -'
155
                             or, if you have GNU tar,
156
                               --extract-with='tar zxf >FILE<'
157
                             and on Windows use, for example:
158
                               --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
159
                             (if WinZip is installed there).
160
   --list-file=<file>        Take the list of file and/or directory names to
161
                             process from <file>, which has one file/directory
162
                             name per line.  Only exact matches are counted;
163
                             relative path names will be resolved starting from 
164
                             the directory where cloc is invoked.  
165
                             See also --exclude-list-file.
166
   --unicode                 Check binary files to see if they contain Unicode
167
                             expanded ASCII text.  This causes performance to
168
                             drop noticably.
169
 
170
 ${BB}Processing Options${NN}
171
   --autoconf                Count .in files (as processed by GNU autoconf) of
172
                             recognized languages.
173
   --by-file                 Report results for every source file encountered.
174
   --by-file-by-lang         Report results for every source file encountered
175
                             in addition to reporting by language.
176
   --count-and-diff <set1> <set2>    
177
                             First perform direct code counts of source file(s)
178
                             of <set1> and <set2> separately, then perform a diff 
179
                             of these.  Inputs may be pairs of files, directories, 
180
                             or archives.  See also --diff, --diff-alignment,
181
                             --diff-timeout, --ignore-case, --ignore-whitespace.
182
   --diff <set1> <set2>      Compute differences in code and comments between
183
                             source file(s) of <set1> and <set2>.  The inputs
184
                             may be pairs of files, directories, or archives.
185
                             Use --diff-alignment to generate a list showing
186
                             which file pairs where compared.  See also
187
                             --count-and-diff, --diff-alignment, --diff-timeout, 
188
                             --ignore-case, --ignore-whitespace.
189
   --diff-timeout <N>        Ignore files which take more than <N> seconds
190
                             to process.  Default is 10 seconds.
191
                             (Large files with many repeated lines can cause 
192
                             Algorithm::Diff::sdiff() to take hours.)
193
   --follow-links            [Unix only] Follow symbolic links to directories
194
                             (sym links to files are always followed).
195
   --force-lang=<lang>[,<ext>]
196
                             Process all files that have a <ext> extension
197
                             with the counter for language <lang>.  For
198
                             example, to count all .f files with the
199
                             Fortran 90 counter (which expects files to
200
                             end with .f90) instead of the default Fortran 77
201
                             counter, use
202
                               --force-lang=\"Fortran 90\",f
203
                             If <ext> is omitted, every file will be counted
204
                             with the <lang> counter.  This option can be
205
                             specified multiple times (but that is only
206
                             useful when <ext> is given each time).
207
                             See also --script-lang, --lang-no-ext.
208
   --force-lang-def=<file>   Load language processing filters from <file>,
209
                             then use these filters instead of the built-in
210
                             filters.  Note:  languages which map to the same 
211
                             file extension (for example:
212
                             MATLAB/Objective C/MUMPS/Mercury;  Pascal/PHP; 
213
                             Lisp/OpenCL; Lisp/Julia; Perl/Prolog) will be 
214
                             ignored as these require additional processing 
215
                             that is not expressed in language definition 
216
                             files.  Use --read-lang-def to define new 
217
                             language filters without replacing built-in 
218
                             filters (see also --write-lang-def).
219
   --ignore-whitespace       Ignore horizontal white space when comparing files
220
                             with --diff.  See also --ignore-case.
221
   --ignore-case             Ignore changes in case; consider upper- and lower-
222
                             case letters equivalent when comparing files with
223
                             --diff.  See also --ignore-whitespace.
224
   --lang-no-ext=<lang>      Count files without extensions using the <lang>
225
                             counter.  This option overrides internal logic
226
                             for files without extensions (where such files
227
                             are checked against known scripting languages
228
                             by examining the first line for #!).  See also
229
                             --force-lang, --script-lang.
230
   --max-file-size=<MB>      Skip files larger than <MB> megabytes when
231
                             traversing directories.  By default, <MB>=100.
232
                             cloc's memory requirement is roughly twenty times 
233
                             larger than the largest file so running with 
234
                             files larger than 100 MB on a computer with less 
235
                             than 2 GB of memory will cause problems.  
236
                             Note:  this check does not apply to files 
237
                             explicitly passed as command line arguments.
238
   --read-binary-files       Process binary files in addition to text files.
239
                             This is usually a bad idea and should only be
240
                             attempted with text files that have embedded
241
                             binary data.
242
   --read-lang-def=<file>    Load new language processing filters from <file>
243
                             and merge them with those already known to cloc.  
244
                             If <file> defines a language cloc already knows 
245
                             about, cloc's definition will take precedence.  
246
                             Use --force-lang-def to over-ride cloc's 
247
                             definitions (see also --write-lang-def ).
248
   --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
249
                             scripting language with the counter for language
250
                             <lang>.  For example, files that begin with
251
                                #!/usr/local/bin/perl5.8.8
252
                             will be counted with the Perl counter by using
253
                                --script-lang=Perl,perl5.8.8
254
                             The language name is case insensitive but the
255
                             name of the script language executable, <s>,
256
                             must have the right case.  This option can be
257
                             specified multiple times.  See also --force-lang,
258
                             --lang-no-ext.
259
   --sdir=<dir>              Use <dir> as the scratch directory instead of
260
                             letting File::Temp chose the location.  Files
261
                             written to this location are not removed at
262
                             the end of the run (as they are with File::Temp).
263
   --skip-uniqueness         Skip the file uniqueness check.  This will give
264
                             a performance boost at the expense of counting
265
                             files with identical contents multiple times
266
                             (if such duplicates exist).
267
   --stdin-name=<file>       Give a file name to use to determine the language
268
                             for standard input.
269
   --strip-comments=<ext>    For each file processed, write to the current
270
                             directory a version of the file which has blank
271
                             lines and comments removed.  The name of each
272
                             stripped file is the original file name with
273
                             .<ext> appended to it.  It is written to the
274
                             current directory unless --original-dir is on.
275
   --original-dir            [Only effective in combination with
276
                             --strip-comments]  Write the stripped files
277
                             to the same directory as the original files.
278
   --sum-reports             Input arguments are report files previously
279
                             created with the --report-file option.  Makes
280
                             a cumulative set of results containing the
281
                             sum of data from the individual report files.
282
   --unix                    Override the operating system autodetection
283
                             logic and run in UNIX mode.  See also
284
                             --windows, --show-os.
285
   --windows                 Override the operating system autodetection
286
                             logic and run in Microsoft Windows mode.
287
                             See also --unix, --show-os.
288
 
289
 ${BB}Filter Options${NN}
290
   --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
291
                             D1, D2, D3, et cetera, from being scanned.  For
292
                             example  --exclude-dir=.cache,test  will skip
293
                             all files that have /.cache/ or /test/ as part
294
                             of their path.
295
                             Directories named .bzr, .cvs, .hg, .git, and
296
                             .svn are always excluded.
297
   --exclude-ext=<ext1>[,<ext2>[...]]
298
                             Do not count files having the given file name
299
                             extensions.
300
   --exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
301
                             L1, L2, L3, et cetera, from being counted.
302
   --exclude-list-file=<file>  Ignore files and/or directories whose names
303
                             appear in <file>.  <file> should have one file
304
                             name per line.  Only exact matches are ignored;
305
                             relative path names will be resolved starting from 
306
                             the directory where cloc is invoked.  
307
                             See also --list-file.
308
   --include-lang=<L1>[,L2,] Count only the given comma separated languages
309
                             L1, L2, L3, et cetera.
310
   --match-d=<regex>         Only count files in directories matching the Perl
311
                             regex.  For example
312
                               --match-d='/(src|include)/'
313
                             only counts files in directories containing
314
                             /src/ or /include/.
315
   --not-match-d=<regex>     Count all files except those in directories
316
                             matching the Perl regex.
317
   --match-f=<regex>         Only count files whose basenames match the Perl
318
                             regex.  For example
319
                               --match-f='^[Ww]idget'
320
                             only counts files that start with Widget or widget.
321
   --not-match-f=<regex>     Count all files except those whose basenames
322
                             match the Perl regex.
323
   --skip-archive=<regex>    Ignore files that end with the given Perl regular
324
                             expression.  For example, if given
325
                               --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
326
                             the code will skip files that end with .zip,
327
                             .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
328
                             .tar.7z.
329
   --skip-win-hidden         On Windows, ignore hidden files.
330
 
331
 ${BB}Debug Options${NN}
332
   --categorized=<file>      Save names of categorized files to <file>.
333
   --counted=<file>          Save names of processed source files to <file>.
334
   --explain=<lang>          Print the filters used to remove comments for
335
                             language <lang> and exit.  In some cases the 
336
                             filters refer to Perl subroutines rather than
337
                             regular expressions.  An examination of the
338
                             source code may be needed for further explanation.
339
   --diff-alignment=<file>   Write to <file> a list of files and file pairs
340
                             showing which files were added, removed, and/or
341
                             compared during a run with --diff.  This switch
342
                             forces the --diff mode on.
343
   --help                    Print this usage information and exit.
344
   --found=<file>            Save names of every file found to <file>.
345
   --ignored=<file>          Save names of ignored files and the reason they
346
                             were ignored to <file>.
347
   --print-filter-stages     Print processed source code before and after 
348
                             each filter is applied.
349
   --show-ext[=<ext>]        Print information about all known (or just the
350
                             given) file extensions and exit.
351
   --show-lang[=<lang>]      Print information about all known (or just the
352
                             given) languages and exit.
353
   --show-os                 Print the value of the operating system mode
354
                             and exit.  See also --unix, --windows.
355
   -v[=<n>]                  Verbose switch (optional numeric value).
356
   --version                 Print the version of this program and exit.
357
   --write-lang-def=<file>   Writes to <file> the language processing filters
358
                             then exits.  Useful as a first step to creating
359
                             custom language definitions (see also
360
                             --force-lang-def, --read-lang-def).
361
 
362
 ${BB}Output Options${NN}
363
   --3                       Print third-generation language output.
364
                             (This option can cause report summation to fail
365
                             if some reports were produced with this option
366
                             while others were produced without it.)
367
   --by-percent  X           Instead of comment and blank line counts, show 
368
                             these values as percentages based on the value 
369
                             of X in the denominator:
370
                                X = 'c'   -> # lines of code
371
                                X = 'cm'  -> # lines of code + comments
372
                                X = 'cb'  -> # lines of code + blanks
373
                                X = 'cmb' -> # lines of code + comments + blanks
374
                             For example, if using method 'c' and your code
375
                             has twice as many lines of comments as lines 
376
                             of code, the value in the comment column will 
377
                             be 200%.  The code column remains a line count.
378
   --csv                     Write the results as comma separated values.
379
   --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
380
                             separated files instead of ,.  This switch forces
381
   --out=<file>              Synonym for --report-file=<file>.
382
                             --csv to be on.
383
   --progress-rate=<n>       Show progress update after every <n> files are
384
                             processed (default <n>=100).  Set <n> to 0 to
385
                             suppress progress output (useful when redirecting
386
                             output to STDOUT).
387
   --quiet                   Suppress all information messages except for
388
                             the final report.
389
   --report-file=<file>      Write the results to <file> instead of STDOUT.
390
   --sql=<file>              Write results as SQL create and insert statements
391
                             which can be read by a database program such as
392
                             SQLite.  If <file> is -, output is sent to STDOUT.
393
   --sql-append              Append SQL insert statements to the file specified
394
                             by --sql and do not generate table creation
395
                             statements.  Only valid with the --sql option.
396
   --sql-project=<name>      Use <name> as the project identifier for the
397
                             current run.  Only valid with the --sql option.
398
   --sql-style=<style>       Write SQL statements in the given style instead
399
                             of the default SQLite format.  Currently, the 
400
                             only style option is Oracle.
401
   --sum-one                 For plain text reports, show the SUM: output line
402
                             even if only one input file is processed.
403
   --xml                     Write the results in XML.
404
   --xsl=<file>              Reference <file> as an XSL stylesheet within
405
                             the XML output.  If <file> is 1 (numeric one),
406
                             writes a default stylesheet, cloc.xsl (or
407
                             cloc-diff.xsl if --diff is also given).
408
                             This switch forces --xml on.
409
   --yaml                    Write the results in YAML.
410
 
411
";
412
#  Help information for options not yet implemented:
413
#  --inline                  Process comments that appear at the end
414
#                            of lines containing code.
415
#  --html                    Create HTML files of each input file showing
416
#                            comment and code lines in different colors.
417
 
418
$| = 1;  # flush STDOUT
419
my $start_time = get_time();
420
my (
421
    $opt_categorized          ,
422
    $opt_found                ,
423
    @opt_force_lang           ,
424
    $opt_lang_no_ext          ,
425
    @opt_script_lang          ,
426
    $opt_count_diff           ,
427
    $opt_diff                 ,
428
    $opt_diff_alignment       ,
429
    $opt_diff_timeout         ,
430
    $opt_html                 ,
431
    $opt_ignored              ,
432
    $opt_counted              ,
433
    $opt_show_ext             ,
434
    $opt_show_lang            ,
435
    $opt_progress_rate        ,
436
    $opt_print_filter_stages  ,
437
    $opt_v                    ,
438
    $opt_version              ,
439
    $opt_exclude_lang         ,
440
    $opt_exclude_list_file    ,
441
    $opt_exclude_dir          ,
442
    $opt_explain              ,
443
    $opt_include_lang         ,
444
    $opt_force_lang_def       ,
445
    $opt_read_lang_def        ,
446
    $opt_write_lang_def       ,
447
    $opt_strip_comments       ,
448
    $opt_original_dir         ,
449
    $opt_quiet                ,
450
    $opt_report_file          ,
451
    $opt_sdir                 ,
452
    $opt_sum_reports          ,
453
    $opt_unicode              ,
454
    $opt_no3                  ,   # accept it but don't use it
455
    $opt_3                    ,
456
    $opt_extract_with         ,
457
    $opt_by_file              ,
458
    $opt_by_file_by_lang      ,
459
    $opt_by_percent           ,
460
    $opt_xml                  ,
461
    $opt_xsl                  ,
462
    $opt_yaml                 ,
463
    $opt_csv                  ,
464
    $opt_csv_delimiter        ,
465
    $opt_match_f              ,
466
    $opt_not_match_f          ,
467
    $opt_match_d              ,
468
    $opt_not_match_d          ,
469
    $opt_skip_uniqueness      ,
470
    $opt_list_file            ,
471
    $opt_help                 ,
472
    $opt_skip_win_hidden      ,
473
    $opt_read_binary_files    ,
474
    $opt_sql                  ,
475
    $opt_sql_append           ,
476
    $opt_sql_project          ,
477
    $opt_sql_style            ,
478
    $opt_inline               ,
479
    $opt_exclude_ext          ,
480
    $opt_ignore_whitespace    ,
481
    $opt_ignore_case          ,
482
    $opt_follow_links         ,
483
    $opt_autoconf             ,
484
    $opt_sum_one              ,
485
    $opt_stdin_name           ,
486
    $opt_force_on_windows     ,
487
    $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
488
    $opt_show_os              ,
489
    $opt_skip_archive         ,
490
    $opt_max_file_size        ,   # in MB
491
   );
492
my $getopt_success = GetOptions(
493
   "by_file|by-file"                         => \$opt_by_file             ,
494
   "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
495
   "categorized=s"                           => \$opt_categorized         ,
496
   "counted=s"                               => \$opt_counted             ,
497
   "include_lang|include-lang=s"             => \$opt_include_lang        ,
498
   "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
499
   "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
500
   "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
501
   "explain=s"                               => \$opt_explain             ,
502
   "extract_with|extract-with=s"             => \$opt_extract_with        ,
503
   "found=s"                                 => \$opt_found               ,
504
   "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
505
   "diff"                                    => \$opt_diff                ,
506
   "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
507
   "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
508
   "html"                                    => \$opt_html                ,
509
   "ignored=s"                               => \$opt_ignored             ,
510
   "quiet"                                   => \$opt_quiet               ,
511
   "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
512
   "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
513
   "show_ext|show-ext:s"                     => \$opt_show_ext            ,
514
   "show_lang|show-lang:s"                   => \$opt_show_lang           ,
515
   "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
516
   "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
517
   "report_file|report-file=s"               => \$opt_report_file         ,
518
   "out=s"                                   => \$opt_report_file         ,
519
   "script_lang|script-lang=s"               => \@opt_script_lang         ,
520
   "sdir=s"                                  => \$opt_sdir                ,
521
   "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
522
   "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
523
   "original_dir|original-dir"               => \$opt_original_dir        ,
524
   "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
525
   "unicode"                                 => \$opt_unicode             ,
526
   "no3"                                     => \$opt_no3                 ,  # ignored
527
   "3"                                       => \$opt_3                   ,
528
   "v:i"                                     => \$opt_v                   ,
529
   "version"                                 => \$opt_version             ,
530
   "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
531
   "xml"                                     => \$opt_xml                 ,
532
   "xsl=s"                                   => \$opt_xsl                 ,
533
   "force_lang|force-lang=s"                 => \@opt_force_lang          ,
534
   "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
535
   "yaml"                                    => \$opt_yaml                ,
536
   "csv"                                     => \$opt_csv                 ,
537
   "csv_delimeter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
538
   "match_f|match-f=s"                       => \$opt_match_f             ,
539
   "not_match_f|not-match-f=s"               => \$opt_not_match_f         ,
540
   "match_d|match-d=s"                       => \$opt_match_d             ,
541
   "not_match_d|not-match-d=s"               => \$opt_not_match_d         ,
542
   "list_file|list-file=s"                   => \$opt_list_file           ,
543
   "help"                                    => \$opt_help                ,
544
   "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
545
   "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
546
   "sql=s"                                   => \$opt_sql                 ,
547
   "sql_project|sql-project=s"               => \$opt_sql_project         ,
548
   "sql_append|sql-append"                   => \$opt_sql_append          ,
549
   "sql_style|sql-style=s"                   => \$opt_sql_style           ,
550
   "inline"                                  => \$opt_inline              ,
551
   "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
552
   "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
553
   "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
554
   "follow_links|follow-links"               => \$opt_follow_links        ,
555
   "autoconf"                                => \$opt_autoconf            ,
556
   "sum_one|sum-one"                         => \$opt_sum_one             ,
557
   "by_percent|by-percent=s"                 => \$opt_by_percent          ,
558
   "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
559
   "windows"                                 => \$opt_force_on_windows    ,
560
   "unix"                                    => \$opt_force_on_unix       ,
561
   "show_os|show-os"                         => \$opt_show_os             ,
562
   "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
563
   "max_file_size|max-file-size=i"           => \$opt_max_file_size       ,
564
  );
565
$opt_by_file  = 1 if defined  $opt_by_file_by_lang;
566
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
567
   $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
568
die "\n" unless $getopt_success;
569
die $usage if $opt_help;
570
my %Exclude_Language = ();
571
   %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang) 
572
        if $opt_exclude_lang;
573
my %Exclude_Dir      = ();
574
   %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir ) 
575
        if $opt_exclude_dir ;
576
my %Include_Language = ();
577
   %Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang) 
578
        if $opt_include_lang;
579
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
580
# directories often conflict with files of interest.
581
$opt_exclude_dir       = 1;
582
$Exclude_Dir{".svn"}   = 1;
583
$Exclude_Dir{".cvs"}   = 1;
584
$Exclude_Dir{".hg"}    = 1;
585
$Exclude_Dir{".git"}   = 1;
586
$Exclude_Dir{".bzr"}   = 1;
587
$opt_count_diff        = defined $opt_count_diff ? 1 : 0;
588
$opt_diff              = 1  if $opt_diff_alignment;
589
$opt_exclude_ext       = "" unless $opt_exclude_ext;
590
$opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
591
$opt_ignore_case       = 0  unless $opt_ignore_case;
592
$opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
593
$opt_follow_links      = 0  unless $opt_follow_links;
594
$opt_diff_timeout      =10  unless $opt_diff_timeout;
595
$opt_csv               = 1  if $opt_csv_delimiter;
596
$ON_WINDOWS            = 1  if $opt_force_on_windows;
597
$ON_WINDOWS            = 0  if $opt_force_on_unix;
598
$opt_max_file_size     = 100 unless $opt_max_file_size;
599
 
600
my @COUNT_DIFF_ARGV        = undef;
601
my $COUNT_DIFF_report_file = undef;
602
if ($opt_count_diff) {
603
    die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n" 
604
        if scalar @ARGV != 2;
605
    # prefix with a dummy term so that $opt_count_diff is the
606
    # index into @COUNT_DIFF_ARGV to work on at each pass
607
    @COUNT_DIFF_ARGV = (undef, $ARGV[0],
608
                               $ARGV[1],
609
                              [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
610
    $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
611
}
612
 
613
# Options defaults:
614
$opt_progress_rate = 100 unless defined $opt_progress_rate;
615
$opt_progress_rate =   0 if     defined $opt_quiet;
616
if (!defined $opt_v) {
617
    $opt_v  = 0;
618
} elsif (!$opt_v) {
619
    $opt_v  = 1;
620
}
621
if (defined $opt_xsl) {
622
    $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
623
    $opt_xml = 1;
624
}
625
my $skip_generate_report = 0;
626
$opt_sql_style = 0 unless defined $opt_sql_style;
627
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
628
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
629
    $opt_quiet            = 1;
630
    $skip_generate_report = 1;
631
    $opt_by_file          = 1;
632
    $opt_sum_reports      = 0;
633
    $opt_progress_rate    = 0;
634
} elsif ($opt_sql)  { # write SQL output to a file
635
    $opt_by_file          = 1;
636
    $skip_generate_report = 1;
637
    $opt_sum_reports      = 0;
638
}
639
if ($opt_sql_style) {
640
    $opt_sql_style = lc $opt_sql_style;
641
    if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle )) {
642
        die "'$opt_sql_style' is not a recognized SQL style.\n";
643
    }
644
}
645
$opt_by_percent = '' unless defined $opt_by_percent;
646
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
647
    die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
648
}
649
$opt_by_percent = lc $opt_by_percent;
650
 
651
die $usage unless defined $opt_version         or
652
                  defined $opt_show_lang       or
653
                  defined $opt_show_ext        or
654
                  defined $opt_show_os         or
655
                  defined $opt_write_lang_def  or
656
                  defined $opt_list_file       or
657
                  defined $opt_xsl             or
658
                  defined $opt_explain         or
659
                  scalar @ARGV >= 1;
660
die "--diff requires two arguments; got ", scalar @ARGV, "\n" 
661
    if $opt_diff and scalar @ARGV != 2;
662
if ($opt_version) {
663
    printf "$VERSION\n";
664
    exit;
665
}
666
# 1}}}
667
# Step 1:  Initialize global constants.        {{{1
668
#
669
my $nFiles_Found = 0;  # updated in make_file_list
670
my (%Language_by_Extension, %Language_by_Script,
671
    %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
672
    %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
673
    %EOL_Continuation_re,
674
   );
675
my $ALREADY_SHOWED_HEADER = 0;
676
my $ALREADY_SHOWED_XML_SECTION = 0;
677
my %Error_Codes = ( 'Unable to read'                => -1,
678
                    'Neither file nor directory'    => -2, 
679
                    'Diff error (quoted comments?)' => -3, 
680
                    'Diff error, exceeded timeout'  => -4, 
681
                  );
682
if ($opt_force_lang_def) {
683
    # replace cloc's definitions
684
    read_lang_def(
685
        $opt_force_lang_def    , #        Sample values:
686
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
687
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
688
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
689
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
690
                                 #      [ 'remove_matches' , '^\s*#'  ]
691
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
692
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
693
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
694
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
695
        );
696
} else {
697
    set_constants(               #
698
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
699
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
700
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
701
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
702
                                 #      [ 'remove_matches' , '^\s*#'  ]
703
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
704
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
705
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
706
        \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
707
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
708
        );
709
}
710
if ($opt_read_lang_def) {
711
    # augment cloc's definitions (keep cloc's where there are overlaps)
712
    merge_lang_def(
713
        $opt_read_lang_def     , #        Sample values:
714
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
715
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
716
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
717
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
718
                                 #      [ 'remove_matches' , '^\s*#'  ]
719
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
720
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
721
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
722
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
723
        );
724
}
725
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
726
    die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
727
}
728
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
729
                      \%Scale_Factor);
730
 
731
# Process command line provided extension-to-language mapping overrides.
732
# Make a hash of known languages in lower case for easier matching.
733
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
734
foreach my $language (keys %Filters_by_Language) {
735
    my $lang_lc = lc $language;
736
    $Recognized_Language_lc{$lang_lc} = $language;
737
}
738
my %Forced_Extension = (); # file name extensions which user wants to count
739
my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
740
foreach my $pair (@opt_force_lang) {
741
    my ($lang, $extension) = split(',', $pair);
742
    my $lang_lc = lc $lang;
743
    if (defined $extension) {
744
        $Forced_Extension{$extension} = $lang;
745
 
746
        die_unknown_lang($lang, "--force-lang")
747
            unless $Recognized_Language_lc{$lang_lc}; 
748
 
749
        $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
750
    } else {
751
        # the scary case--count everything as this language
752
        $All_One_Language = $Recognized_Language_lc{$lang_lc};
753
    }
754
}
755
 
756
foreach my $pair (@opt_script_lang) {
757
    my ($lang, $script_name) = split(',', $pair);
758
    my $lang_lc = lc $lang;
759
    if (!defined $script_name) {
760
        die "The --script-lang option requires a comma separated pair of ".
761
            "strings.\n";
762
    }
763
 
764
    die_unknown_lang($lang, "--script-lang")
765
        unless $Recognized_Language_lc{$lang_lc}; 
766
 
767
    $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
768
}
769
 
770
# If user provided file extensions to ignore, add these to
771
# the exclusion list.
772
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
773
    $ext = lc $ext if $ON_WINDOWS;
774
    $Not_Code_Extension{$ext} = 1;
775
}
776
 
777
# If SQL or --by-file output is requested, keep track of directory names
778
# generated by File::Temp::tempdir and used to temporarily hold the results
779
# of compressed archives.  Contents of the SQL table 't' will be much
780
# cleaner if these meaningless directory names are stripped from the front
781
# of files pulled from the archives.
782
my %TEMP_DIR = ();
783
my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
784
                    # scratch directories made in this run to avoid
785
                    # file overwrites by multiple extractions to same
786
                    # sdir.
787
# Also track locations where temporary installations, if necessary, of
788
# Algorithm::Diff and/or Regexp::Common are done.  Make sure these 
789
# directories are not counted as inputs (ref bug #80 2012-11-23).
790
my %TEMP_INST = ();
791
 
792
# invert %Language_by_Script hash to get an easy-to-look-up list of known
793
# scripting languages
794
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
795
# 1}}}
796
# Step 2:  Early exits for display, summation. {{{1
797
#
798
print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
799
print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
800
print_language_filters( $opt_explain      ) if defined $opt_explain  ;
801
exit if (defined $opt_show_ext)  or 
802
        (defined $opt_show_lang) or
803
        (defined $opt_explain);
804
 
805
Top_of_Processing_Loop:
806
# Sorry, coding purists.  Using a goto to implement --count-and-diff
807
# which has to do three passes over the main code, starting with
808
# a clean slate each time.
809
if ($opt_count_diff) {
810
    @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
811
    if ($opt_count_diff == 3) {
812
        $opt_diff = 1;
813
        @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
814
    }
815
    if ($opt_report_file) { 
816
        # Instead of just one output file, will have three.
817
        # Keep their names unique otherwise results are clobbered.
818
        if ($opt_count_diff == 3) {
819
            $opt_report_file = $COUNT_DIFF_report_file . ".diff.$ARGV[0].$ARGV[1]";
820
        } else {
821
            $opt_report_file = $COUNT_DIFF_report_file . "." .  $ARGV[0];
822
        }
823
    } else {
824
        # STDOUT; print a header showing what it's working on
825
        if ($opt_count_diff == 3) {
826
            print "\ndiff $ARGV[0] $ARGV[1]::\n";
827
        } else {
828
            print "\n" if $opt_count_diff > 1;
829
            print "$ARGV[0]::\n";
830
        }
831
    }
832
    $ALREADY_SHOWED_HEADER      = 0;
833
    $ALREADY_SHOWED_XML_SECTION = 0;
834
}
835
 
836
#print "Before glob have [", join(",", @ARGV), "]\n";
837
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
838
#print "after  glob have [", join(",", @ARGV), "]\n";
839
 
840
# filter out archive files if requested to do so
841
if (defined $opt_skip_archive) {
842
    my @non_archive = ();
843
    foreach my $candidate (@ARGV) {
844
        if ($candidate !~ m/${opt_skip_archive}$/) {
845
            push @non_archive, $candidate;
846
 
847
        } 
848
    }
849
    @ARGV = @non_archive;
850
}
851
 
852
if ($opt_sum_reports and $opt_diff) {
853
    my @results = ();
854
    if ($opt_list_file) { # read inputs from the list file
855
        my @list = read_list_file($opt_list_file);
856
        @results = combine_diffs(\@list);
857
    } else { # get inputs from the command line
858
        @results = combine_diffs(\@ARGV);
859
    }
860
    if ($opt_report_file) {
861
        write_file($opt_report_file, @results);
862
    } else {
863
        print "\n", join("\n", @results), "\n";
864
    }
865
    exit;
866
}
867
if ($opt_sum_reports) {
868
    my %Results = ();
869
    foreach my $type( "by language", "by report file" ) {
870
        my $found_lang = undef; 
871
        if ($opt_list_file) { # read inputs from the list file
872
            my @list = read_list_file($opt_list_file);
873
            $found_lang = combine_results(\@list, 
874
                                           $type, 
875
                                          \%{$Results{ $type }}, 
876
                                          \%Filters_by_Language );
877
        } else { # get inputs from the command line
878
            $found_lang = combine_results(\@ARGV, 
879
                                           $type, 
880
                                          \%{$Results{ $type }}, 
881
                                          \%Filters_by_Language );
882
        }
883
        next unless %Results;
884
        my $end_time = get_time();
885
        my @results  = generate_report($VERSION, $end_time - $start_time,
886
                                       $type,
887
                                      \%{$Results{ $type }}, \%Scale_Factor);
888
        if ($opt_report_file) {
889
            my $ext  = ".lang";
890
               $ext  = ".file" unless $type eq "by language";
891
            next if !$found_lang and  $ext  eq ".lang";
892
            write_file($opt_report_file . $ext, @results);
893
        } else {
894
            print "\n", join("\n", @results), "\n";
895
        }
896
    }
897
    exit;
898
}
899
if ($opt_write_lang_def) {
900
    write_lang_def($opt_write_lang_def   ,
901
                  \%Language_by_Extension,
902
                  \%Language_by_Script   ,
903
                  \%Language_by_File     ,
904
                  \%Filters_by_Language  ,
905
                  \%Not_Code_Extension   ,
906
                  \%Not_Code_Filename    ,
907
                  \%Scale_Factor         ,
908
                  \%EOL_Continuation_re  ,
909
                  );
910
    exit;
911
}
912
if ($opt_show_os) {
913
    if ($ON_WINDOWS) {
914
        print "Windows\n";
915
    } else {
916
        print "UNIX\n";
917
    }
918
    exit;
919
}
920
# 1}}}
921
# Step 3:  Create a list of files to consider. {{{1
922
#  a) If inputs are binary archives, first cd to a temp
923
#     directory, expand the archive with the user-given
924
#     extraction tool, then add the temp directory to
925
#     the list of dirs to process.
926
#  b) Create a list of every file that might contain source
927
#     code.  Ignore binary files, zero-sized files, and
928
#     any file in a directory the user says to exclude.
929
#  c) Determine the language for each file in the list.
930
#
931
my @binary_archive = ();
932
my $cwd            = cwd();
933
if ($opt_extract_with) {
934
#print "cwd main = [$cwd]\n";
935
    my @extract_location = ();
936
    foreach my $bin_file (@ARGV) {
937
        my $extract_dir = undef;
938
        if ($opt_sdir) {
939
            ++$TEMP_OFF;
940
            $extract_dir = "$opt_sdir/$TEMP_OFF";
941
            File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
942
            File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
943
        } else {
944
            $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
945
        }
946
        $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
947
        print "mkdir $extract_dir\n"  if $opt_v;
948
        print "cd    $extract_dir\n"  if $opt_v;
949
        chdir $extract_dir;
950
        my $bin_file_full_path = "";
951
        if (File::Spec->file_name_is_absolute( $bin_file )) {
952
            $bin_file_full_path = $bin_file;
953
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
954
        } else {
955
            $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
956
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
957
        }
958
        my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
959
        print  $extract_cmd, "\n" if $opt_v;
960
        system $extract_cmd;
961
        push @extract_location, $extract_dir;
962
        chdir $cwd;
963
    }
964
    # It is possible that the binary archive itself contains additional
965
    # files compressed the same way (true for Java .ear files).  Go
966
    # through all the files that were extracted, see if they are binary
967
    # archives and try to extract them.  Lather, rinse, repeat.
968
    my $binary_archives_exist = 1;
969
    my $count_binary_archives = 0;
970
    my $previous_count        = 0;
971
    my $n_pass                = 0;
972
    while ($binary_archives_exist) {
973
        @binary_archive = ();
974
        foreach my $dir (@extract_location) {
975
            find(\&archive_files, $dir);  # populates global @binary_archive
976
        }
977
        foreach my $archive (@binary_archive) {
978
            my $extract_dir = undef;
979
            if ($opt_sdir) {
980
                ++$TEMP_OFF;
981
                $extract_dir = "$opt_sdir/$TEMP_OFF";
982
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
983
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
984
            } else {
985
                $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
986
            }
987
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
988
            print "mkdir $extract_dir\n"  if $opt_v;
989
            print "cd    $extract_dir\n"  if $opt_v;
990
            chdir  $extract_dir;
991
 
992
            my     $extract_cmd = uncompress_archive_cmd($archive);
993
            print  $extract_cmd, "\n" if $opt_v;
994
            system $extract_cmd;
995
            push @extract_location, $extract_dir;
996
            unlink $archive;  # otherwise will be extracting it forever 
997
        }
998
        $count_binary_archives = scalar @binary_archive;
999
        if ($count_binary_archives == $previous_count) {
1000
            $binary_archives_exist = 0;
1001
        }
1002
        $previous_count = $count_binary_archives;
1003
    }
1004
    chdir $cwd;
1005
 
1006
    @ARGV = @extract_location;
1007
} else {
1008
    # see if any of the inputs need to be auto-uncompressed &/or expanded
1009
    my @updated_ARGS = ();
1010
    foreach my $Arg (@ARGV) {
1011
        if (is_dir($Arg)) {
1012
            push @updated_ARGS, $Arg;
1013
            next;
1014
        }
1015
        my $full_path = "";
1016
        if (File::Spec->file_name_is_absolute( $Arg )) {
1017
            $full_path = $Arg;
1018
        } else {
1019
            $full_path = File::Spec->catfile( $cwd, $Arg );
1020
        }
1021
#print "full_path = [$full_path]\n";
1022
        my $extract_cmd = uncompress_archive_cmd($full_path);
1023
        if ($extract_cmd) {
1024
            my $extract_dir = undef;
1025
            if ($opt_sdir) {
1026
                ++$TEMP_OFF;
1027
                $extract_dir = "$opt_sdir/$TEMP_OFF";
1028
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
1029
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
1030
            } else {
1031
                $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
1032
            }
1033
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
1034
            print "mkdir $extract_dir\n"  if $opt_v;
1035
            print "cd    $extract_dir\n"  if $opt_v;
1036
            chdir  $extract_dir;
1037
            print  $extract_cmd, "\n" if $opt_v;
1038
            system $extract_cmd;
1039
            push @updated_ARGS, $extract_dir;
1040
            chdir $cwd;
1041
        } else {
1042
            # this is a conventional, uncompressed, unarchived file
1043
            # or a directory; keep as-is
1044
            push @updated_ARGS, $Arg;
1045
        }
1046
    }
1047
    @ARGV = @updated_ARGS;
1048
 
1049
    # make sure we're not counting any directory containing
1050
    # temporary installations of Regexp::Common, Algorithm::Diff
1051
    foreach my $d (sort keys %TEMP_INST) {
1052
        foreach my $a (@ARGV) {
1053
            next unless is_dir($a);
1054
            if ($opt_v > 2) {
1055
                printf "Comparing %s (location of %s) to input [%s]\n",
1056
                        $d, $TEMP_INST{$d}, $a;
1057
            }
1058
            if ($a eq $d) {
1059
                die "File::Temp::tempdir chose directory ",
1060
                    $d, " to install ", $TEMP_INST{$d}, " but this ",
1061
                    "matches one of your input directories.  Rerun ",
1062
                    "with --sdir and supply a different temporary ",
1063
                    "directory for ", $TEMP_INST{$d}, "\n";
1064
            }
1065
        }
1066
    }
1067
}
1068
# 1}}}
1069
my @Errors    = ();
1070
my @file_list = ();  # global variable updated in files()
1071
my %Ignored   = ();  # files that are not counted (language not recognized or
1072
                     # problems reading the file)
1073
my @Lines_Out = ();
1074
if ($opt_diff) {
1075
# Step 4:  Separate code from non-code files.  {{{1
1076
my @fh            = ();
1077
my @files_for_set = ();
1078
# make file lists for each separate argument
1079
for (my $i = 0; $i < scalar @ARGV; $i++) {
1080
    push @fh, 
1081
         make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored);
1082
    @{$files_for_set[$i]} = @file_list;
1083
    if ($opt_exclude_list_file) {
1084
        # note: process_exclude_list_file() references global @file_list
1085
        process_exclude_list_file($opt_exclude_list_file, 
1086
                                 \%Exclude_Dir,
1087
                                 \%Ignored);
1088
    }
1089
    @file_list = ();
1090
}
1091
# 1}}}
1092
# Step 5:  Remove duplicate files.             {{{1
1093
#
1094
my %Language           = ();
1095
my %unique_source_file = ();
1096
my $n_set = 0;
1097
foreach my $FH (@fh) {  # loop over each pair of file sets
1098
    ++$n_set;
1099
    remove_duplicate_files($FH, 
1100
                               \%{$Language{$FH}}               , 
1101
                               \%{$unique_source_file{$FH}}     , 
1102
                          \%Error_Codes                         , 
1103
                               \@Errors                         , 
1104
                               \%Ignored                        );
1105
    printf "%2d: %8d unique file%s.                          \r", 
1106
        $n_set,
1107
        plural_form(scalar keys %unique_source_file) 
1108
        unless $opt_quiet;
1109
}
1110
# 1}}}
1111
# Step 6:  Count code, comments, blank lines.  {{{1
1112
#
1113
my %Results_by_Language = ();
1114
my %Results_by_File     = ();
1115
my %Delta_by_Language   = ();
1116
my %Delta_by_File       = ();
1117
 
1118
foreach (my $F = 0; $F < scalar @fh - 1; $F++) { 
1119
    # loop over file sets; do diff between set $F to $F+1
1120
 
1121
    my $nCounted = 0;
1122
 
1123
    my @file_pairs    = ();
1124
    my @files_added   = ();
1125
    my @files_removed = ();
1126
 
1127
    align_by_pairs(\%{$unique_source_file{$fh[$F  ]}}    , # in
1128
                   \%{$unique_source_file{$fh[$F+1]}}    , # in
1129
                   \@files_added                         , # out
1130
                   \@files_removed                       , # out
1131
                   \@file_pairs                          , # out
1132
                   );
1133
    my %already_counted = (); # already_counted{ filename } = 1
1134
 
1135
    if (!@file_pairs) {
1136
        # Special case where all files were either added or deleted.
1137
        # In this case, one of these arrays will be empty: 
1138
        #   @files_added, @files_removed
1139
        # so loop over both to cover both cases.
1140
        my $status = @files_added ? 'added' : 'removed';
1141
        my $offset = @files_added ? 1       : 0        ;
1142
        foreach my $file (@files_added, @files_removed) {
1143
            next unless defined $Language{$fh[$F+$offset]}{$file};
1144
            my $Lang = $Language{$fh[$F+$offset]}{$file};
1145
            next if $Lang eq '(unknown)';
1146
            my ($all_line_count,
1147
                $blank_count   ,
1148
                $comment_count ,
1149
               ) = call_counter($file, $Lang, \@Errors);
1150
            $already_counted{$file} = 1;
1151
            my $code_count = $all_line_count-$blank_count-$comment_count;
1152
            if ($opt_by_file) {
1153
                $Delta_by_File{$file}{'code'   }{$status} += $code_count   ;
1154
                $Delta_by_File{$file}{'blank'  }{$status} += $blank_count  ;
1155
                $Delta_by_File{$file}{'comment'}{$status} += $comment_count;
1156
                $Delta_by_File{$file}{'lang'   }{$status}  = $Lang         ;
1157
                $Delta_by_File{$file}{'nFiles' }{$status} += 1             ;
1158
            }
1159
            $Delta_by_Language{$Lang}{'code'   }{$status} += $code_count   ;
1160
            $Delta_by_Language{$Lang}{'blank'  }{$status} += $blank_count  ;
1161
            $Delta_by_Language{$Lang}{'comment'}{$status} += $comment_count;
1162
            $Delta_by_Language{$Lang}{'nFiles' }{$status} += 1             ;
1163
        }
1164
    }
1165
   #use Data::Dumper::Simple;
1166
   #use Data::Dumper;
1167
   #print Dumper(\@files_added, \@files_removed, \@file_pairs);
1168
    my @alignment = (); # only  used if --diff-alignment
1169
#print "after align_by_pairs:\n";
1170
 
1171
#print "added:\n";
1172
    push @alignment, sprintf "Files added: %d\n", scalar @files_added
1173
        if $opt_diff_alignment;
1174
    foreach my $f (@files_added) {
1175
        next if $already_counted{$f};
1176
#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
1177
        # Don't proceed unless the file (both L and R versions)
1178
        # is in a known language.
1179
        next if $opt_include_lang 
1180
                and not $Include_Language{$Language{$fh[$F+1]}{$f}};
1181
        next if $Language{$fh[$F+1]}{$f} eq "(unknown)";
1182
        next if $Exclude_Language{$Language{$fh[$F+1]}{$f}};
1183
        push @alignment, sprintf "  + %s ; %s\n", $f, $Language{$fh[$F+1]}{$f}
1184
            if $opt_diff_alignment;
1185
        ++$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'nFiles'}{'added'};
1186
        # Additionally, add contents of file $f to 
1187
        #        Delta_by_File{$f}{comment/blank/code}{'added'}
1188
        #        Delta_by_Language{$lang}{comment/blank/code}{'added'}
1189
        my ($all_line_count,
1190
            $blank_count   ,
1191
            $comment_count ,
1192
           ) = call_counter($f, $Language{$fh[$F+1]}{$f}, \@Errors);
1193
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'comment'}{'added'} +=
1194
            $comment_count;
1195
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'blank'}{'added'}   +=
1196
            $blank_count;
1197
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'code'}{'added'}    +=
1198
            $all_line_count - $blank_count - $comment_count;
1199
        $Delta_by_File{ $f }{'comment'}{'added'} = $comment_count;
1200
        $Delta_by_File{ $f }{'blank'}{'added'}   = $blank_count;
1201
        $Delta_by_File{ $f }{'code'}{'added'}    = 
1202
            $all_line_count - $blank_count - $comment_count;
1203
    }
1204
    push @alignment, "\n";
1205
 
1206
#print "removed:\n";
1207
    push @alignment, sprintf "Files removed: %d\n", scalar @files_removed
1208
        if $opt_diff_alignment;
1209
    foreach my $f (@files_removed) {
1210
        next if $already_counted{$f};
1211
        # Don't proceed unless the file (both L and R versions)
1212
        # is in a known language.
1213
        next if $opt_include_lang 
1214
                and not $Include_Language{$Language{$fh[$F]}{$f}};
1215
        next if $Language{$fh[$F]}{$f} eq "(unknown)";
1216
        next if $Exclude_Language{$Language{$fh[$F]}{$f}};
1217
        ++$Delta_by_Language{ $Language{$fh[$F]}{$f} }{'nFiles'}{'removed'};
1218
        push @alignment, sprintf "  - %s ; %s\n", $f, $Language{$fh[$F]}{$f}
1219
            if $opt_diff_alignment;
1220
#printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
1221
        # Additionally, add contents of file $f to 
1222
        #        Delta_by_File{$f}{comment/blank/code}{'removed'}
1223
        #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
1224
        my ($all_line_count,
1225
            $blank_count   ,
1226
            $comment_count ,
1227
           ) = call_counter($f, $Language{$fh[$F  ]}{$f}, \@Errors);
1228
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'comment'}{'removed'} +=
1229
            $comment_count;
1230
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'blank'}{'removed'}   +=
1231
            $blank_count;
1232
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'code'}{'removed'}    +=
1233
            $all_line_count - $blank_count - $comment_count;
1234
        $Delta_by_File{ $f }{'comment'}{'removed'} = $comment_count;
1235
        $Delta_by_File{ $f }{'blank'}{'removed'}   = $blank_count;
1236
        $Delta_by_File{ $f }{'code'}{'removed'}    = 
1237
            $all_line_count - $blank_count - $comment_count;
1238
    }
1239
    push @alignment, "\n";
1240
 
1241
    my $alignment_pairs_index = scalar @alignment;
1242
    my $n_file_pairs_compared = 0;
1243
    # Don't know ahead of time how many file pairs will be compared
1244
    # since duplicates are weeded out below.  The answer is
1245
    # scalar @file_pairs only if there are no duplicates.
1246
    push @alignment, sprintf "File pairs compared: UPDATE_ME\n"
1247
        if $opt_diff_alignment;
1248
 
1249
    foreach my $pair (@file_pairs) {
1250
        my $file_L = $pair->[0];
1251
        my $file_R = $pair->[1];
1252
        my $Lang_L = $Language{$fh[$F  ]}{$file_L};
1253
        my $Lang_R = $Language{$fh[$F+1]}{$file_R};
1254
#print "main step 6 file_L=$file_L    file_R=$file_R\n";
1255
        ++$nCounted;
1256
        printf "Counting:  %d\r", $nCounted 
1257
            unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
1258
        next if $Ignored{$file_L};
1259
        # filter out non-included languages
1260
        if ($opt_include_lang and not $Include_Language{$Lang_L} 
1261
                              and not $Include_Language{$Lang_R}) {
1262
            $Ignored{$file_L} = "--include-lang=$Lang_L";
1263
            $Ignored{$file_R} = "--include-lang=$Lang_R";
1264
            next;
1265
        }
1266
        # filter out excluded or unrecognized languages
1267
        if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
1268
            $Ignored{$file_L} = "--exclude-lang=$Lang_L";
1269
            $Ignored{$file_R} = "--exclude-lang=$Lang_R";
1270
            next;
1271
        }
1272
        my $not_Filters_by_Language_Lang_LR = 0;
1273
#print "file_LR = [$file_L] [$file_R]\n";
1274
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
1275
        if (!(@{$Filters_by_Language{$Lang_L} }) or
1276
            !(@{$Filters_by_Language{$Lang_R} })) {
1277
            $not_Filters_by_Language_Lang_LR = 1;
1278
        }
1279
        if ($not_Filters_by_Language_Lang_LR) {
1280
            if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
1281
                $Ignored{$fh[$F  ]}{$file_L} = "language unknown (#1)";
1282
                $Ignored{$fh[$F+1]}{$file_R} = "language unknown (#1)";
1283
            } else {
1284
                $Ignored{$fh[$F  ]}{$file_L} = "missing Filters_by_Language{$Lang_L}";
1285
                $Ignored{$fh[$F+1]}{$file_R} = "missing Filters_by_Language{$Lang_R}";
1286
            }
1287
            next;
1288
        }
1289
 
1290
#print "DIFF($file_L, $file_R)\n";
1291
        # step 0: compare the two files' contents 
1292
        chomp ( my @lines_L = read_file($file_L) );
1293
        chomp ( my @lines_R = read_file($file_R) );
1294
        my $language_file_L = "";
1295
        if (defined $Language{$fh[$F]}{$file_L}) {
1296
            $language_file_L = $Language{$fh[$F]}{$file_L};
1297
        } else {
1298
            # files $file_L and $file_R do not contain known language
1299
            next;
1300
        }
1301
        my $contents_are_same = 1;
1302
        if (scalar @lines_L == scalar @lines_R) {
1303
            # same size, must compare line-by-line
1304
            for (my $i = 0; $i < scalar @lines_L; $i++) {
1305
                if ($lines_L[$i] ne $lines_R[$i]) {
1306
                    $contents_are_same = 0;
1307
                    last;
1308
                }
1309
            }
1310
            if ($contents_are_same) {
1311
                ++$Delta_by_Language{$language_file_L}{'nFiles'}{'same'};
1312
            } else {
1313
                ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
1314
            }
1315
        } else {
1316
            $contents_are_same = 0;
1317
            # different sizes, contents have changed
1318
            ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
1319
        }
1320
        if ($opt_diff_alignment) {
1321
            my $str =  "$file_L | $file_R ; $language_file_L";
1322
            if ($contents_are_same) {
1323
                push @alignment, "  == $str";
1324
            } else {
1325
                push @alignment, "  != $str";
1326
            }
1327
            ++$n_file_pairs_compared;
1328
        }
1329
 
1330
        # step 1: identify comments in both files
1331
#print "Diff blank removal L language= $Lang_L";
1332
#print " scalar(lines_L)=", scalar @lines_L, "\n";
1333
        my @original_minus_blanks_L 
1334
                    = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
1335
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
1336
        @lines_L    = @original_minus_blanks_L;
1337
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
1338
        @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
1339
        @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
1340
                                   \%EOL_Continuation_re);
1341
#print "3: scalar(lines_L)=", scalar @lines_L, "\n";
1342
 
1343
#print "Diff blank removal R language= $Lang_R\n";
1344
        my @original_minus_blanks_R 
1345
                    = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
1346
        @lines_R    = @original_minus_blanks_R;
1347
        @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
1348
        @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
1349
                                   \%EOL_Continuation_re);
1350
 
1351
        my (@diff_LL, @diff_LR, );
1352
        array_diff( $file_L                  ,   # in
1353
                   \@original_minus_blanks_L ,   # in
1354
                   \@lines_L                 ,   # in
1355
                   "comment"                 ,   # in
1356
                   \@diff_LL, \@diff_LR      ,   # out
1357
                   \@Errors);                    # in/out
1358
 
1359
        my (@diff_RL, @diff_RR, );
1360
        array_diff( $file_R                  ,   # in
1361
                   \@original_minus_blanks_R ,   # in
1362
                   \@lines_R                 ,   # in
1363
                   "comment"                 ,   # in
1364
                   \@diff_RL, \@diff_RR      ,   # out
1365
                   \@Errors);                    # in/out
1366
        # each line of each file is now classified as
1367
        # code or comment
1368
 
1369
#use Data::Dumper; 
1370
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
1371
#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
1372
#die;
1373
        # step 2: separate code from comments for L and R files
1374
        my @code_L = ();
1375
        my @code_R = ();
1376
        my @comm_L = ();
1377
        my @comm_R = ();
1378
        foreach my $line_info (@diff_LL) {
1379
            if      ($line_info->{'type'} eq "code"   ) {
1380
                push @code_L, $line_info->{char};
1381
            } elsif ($line_info->{'type'} eq "comment") {
1382
                push @comm_L, $line_info->{char};
1383
            } else {
1384
                die "Diff unexpected line type ",
1385
                    $line_info->{'type'}, "for $file_L line ",
1386
                    $line_info->{'lnum'};
1387
            }
1388
        }
1389
        foreach my $line_info (@diff_RL) {
1390
            if      ($line_info->{type} eq "code"   ) {
1391
                push @code_R, $line_info->{'char'};
1392
            } elsif ($line_info->{type} eq "comment") {
1393
                push @comm_R, $line_info->{'char'};
1394
            } else {
1395
                die "Diff unexpected line type ",
1396
                    $line_info->{'type'}, "for $file_R line ",
1397
                    $line_info->{'lnum'};
1398
            }
1399
        }
1400
 
1401
        if ($opt_ignore_whitespace) {
1402
            # strip all whitespace from each line of source code
1403
            # and comments then use these stripped arrays in diffs
1404
            foreach (@code_L) { s/\s+//g }
1405
            foreach (@code_R) { s/\s+//g }
1406
            foreach (@comm_L) { s/\s+//g }
1407
            foreach (@comm_R) { s/\s+//g }
1408
        }
1409
        if ($opt_ignore_case) {
1410
            # change all text to lowercase in diffs
1411
            foreach (@code_L) { $_ = lc }
1412
            foreach (@code_R) { $_ = lc }
1413
            foreach (@comm_L) { $_ = lc }
1414
            foreach (@comm_R) { $_ = lc }
1415
        }
1416
        # step 3: compute code diffs
1417
        array_diff("$file_L v. $file_R"   ,   # in
1418
                   \@code_L               ,   # in
1419
                   \@code_R               ,   # in
1420
                   "revision"             ,   # in
1421
                   \@diff_LL, \@diff_LR   ,   # out
1422
                   \@Errors);                 # in/out
1423
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
1424
#print Dumper("diff_LR", \@diff_LR);
1425
        foreach my $line_info (@diff_LR) {
1426
            my $status = $line_info->{'desc'}; # same|added|removed|modified
1427
            ++$Delta_by_Language{$Lang_L}{'code'}{$status};
1428
            if ($opt_by_file) {
1429
                ++$Delta_by_File{$file_L}{'code'}{$status};
1430
            }
1431
        }
1432
#use Data::Dumper;
1433
#print Dumper("code diffs:", \@diff_LL, \@diff_LR);
1434
 
1435
        # step 4: compute comment diffs
1436
        array_diff("$file_L v. $file_R"   ,   # in
1437
                   \@comm_L               ,   # in
1438
                   \@comm_R               ,   # in
1439
                   "revision"             ,   # in
1440
                   \@diff_LL, \@diff_LR   ,   # out
1441
                   \@Errors);                 # in/out
1442
#print Dumper("comment diff_LR", \@diff_LR);
1443
        foreach my $line_info (@diff_LR) {
1444
            my $status = $line_info->{'desc'}; # same|added|removed|modified
1445
            ++$Delta_by_Language{$Lang_L}{'comment'}{$status};
1446
            if ($opt_by_file) {
1447
                ++$Delta_by_File{$file_L}{'comment'}{$status};
1448
            }
1449
        }
1450
#print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
1451
#die; here=  need to save original line number in diff result for html display
1452
 
1453
        # step 5: compute difference in blank lines (kind of pointless)
1454
        next if $Lang_L eq '(unknown)' or 
1455
                $Lang_R eq '(unknown)';
1456
        my ($all_line_count_L,
1457
            $blank_count_L   ,
1458
            $comment_count_L ,
1459
           ) = call_counter($file_L, $Lang_L, \@Errors);
1460
 
1461
        my ($all_line_count_R,
1462
            $blank_count_R   ,
1463
            $comment_count_R ,
1464
           ) = call_counter($file_R, $Lang_R, \@Errors);
1465
 
1466
        if ($blank_count_L <  $blank_count_R) {
1467
            my $D = $blank_count_R - $blank_count_L;
1468
            $Delta_by_Language{$Lang_L}{'blank'}{'added'}   += $D;
1469
        } else {
1470
            my $D = $blank_count_L - $blank_count_R;
1471
            $Delta_by_Language{$Lang_L}{'blank'}{'removed'} += $D;
1472
        }
1473
        if ($opt_by_file) {
1474
            if ($blank_count_L <  $blank_count_R) {
1475
                my $D = $blank_count_R - $blank_count_L;
1476
                $Delta_by_File{$file_L}{'blank'}{'added'}   += $D;
1477
            } else {
1478
                my $D = $blank_count_L - $blank_count_R;
1479
                $Delta_by_File{$file_L}{'blank'}{'removed'} += $D;
1480
            }
1481
        }
1482
 
1483
        my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
1484
        if ($opt_by_file) {
1485
            $Results_by_File{$file_L}{'code'   } = $code_count_L    ;
1486
            $Results_by_File{$file_L}{'blank'  } = $blank_count_L   ;
1487
            $Results_by_File{$file_L}{'comment'} = $comment_count_L ;
1488
            $Results_by_File{$file_L}{'lang'   } = $Lang_L          ;
1489
            $Results_by_File{$file_L}{'nFiles' } = 1                ;
1490
        } else {
1491
            $Results_by_File{$file_L} = 1;  # just keep track of counted files
1492
        }
1493
 
1494
        $Results_by_Language{$Lang_L}{'nFiles'}++;
1495
        $Results_by_Language{$Lang_L}{'code'}    += $code_count_L   ;
1496
        $Results_by_Language{$Lang_L}{'blank'}   += $blank_count_L  ;
1497
        $Results_by_Language{$Lang_L}{'comment'} += $comment_count_L;
1498
    }
1499
    if ($opt_diff_alignment) {
1500
        $alignment[$alignment_pairs_index] =~ s/UPDATE_ME/$n_file_pairs_compared/;
1501
        write_file($opt_diff_alignment, @alignment);
1502
    }
1503
 
1504
}
1505
#use Data::Dumper;
1506
#print Dumper("Delta_by_Language:"  , \%Delta_by_Language);
1507
#print Dumper("Results_by_Language:", \%Results_by_Language);
1508
#print Dumper("Delta_by_File:"      , \%Delta_by_File);
1509
#print Dumper("Results_by_File:"    , \%Results_by_File);
1510
#die;
1511
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
1512
write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
1513
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
1514
# 1}}}
1515
# Step 7:  Assemble results.                   {{{1
1516
#
1517
my $end_time = get_time();
1518
printf "%8d file%s ignored.                           \n", 
1519
    plural_form(scalar keys %Ignored) unless $opt_quiet;
1520
print_errors(\%Error_Codes, \@Errors) if @Errors;
1521
if (!%Delta_by_Language) {
1522
    print "Nothing to count.\n";
1523
    exit;
1524
}
1525
 
1526
if ($opt_by_file) {
1527
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
1528
                            "by file",
1529
                            \%Delta_by_File, \%Scale_Factor);
1530
} else {
1531
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
1532
                            "by language",
1533
                            \%Delta_by_Language, \%Scale_Factor);
1534
}
1535
 
1536
# 1}}}
1537
} else {
1538
# Step 4:  Separate code from non-code files.  {{{1
1539
my $fh = 0;
1540
if ($opt_list_file) {
1541
    my @list = read_list_file($opt_list_file);
1542
    $fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
1543
} else {
1544
    $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
1545
    #     make_file_list populates global variable @file_list via call to 
1546
    #     File::Find's find() which in turn calls files()
1547
}
1548
if ($opt_exclude_list_file) {
1549
    # note: process_exclude_list_file() references global @file_list
1550
    process_exclude_list_file($opt_exclude_list_file, 
1551
                             \%Exclude_Dir,
1552
                             \%Ignored);
1553
}
1554
if ($opt_skip_win_hidden and $ON_WINDOWS) {
1555
    my @file_list_minus_hidded = ();
1556
    # eval code to run on Unix without 'missing Win32::File module' error.
1557
    my $win32_file_invocation = '
1558
        use Win32::File;
1559
        foreach my $F (@file_list) {
1560
            my $attr = undef;
1561
            Win32::File::GetAttributes($F, $attr);
1562
            if ($attr & HIDDEN) {
1563
                $Ignored{$F} = "Windows hidden file";
1564
                print "Ignoring $F since it is a Windows hidden file\n" 
1565
                    if $opt_v > 1;
1566
            } else {
1567
                push @file_list_minus_hidded, $F;
1568
            }
1569
        }';
1570
    eval $win32_file_invocation;
1571
    @file_list = @file_list_minus_hidded;
1572
}
1573
#printf "%8d file%s excluded.                     \n", 
1574
#   plural_form(scalar keys %Ignored) 
1575
#   unless $opt_quiet;
1576
# die print ": ", join("\n: ", @file_list), "\n";
1577
# 1}}}
1578
# Step 5:  Remove duplicate files.             {{{1
1579
#
1580
my %Language           = ();
1581
my %unique_source_file = ();
1582
remove_duplicate_files($fh                          ,   # in 
1583
                           \%Language               ,   # out
1584
                           \%unique_source_file     ,   # out
1585
                      \%Error_Codes                 ,   # in
1586
                           \@Errors                 ,   # out
1587
                           \%Ignored                );  # out
1588
printf "%8d unique file%s.                              \n", 
1589
    plural_form(scalar keys %unique_source_file) 
1590
    unless $opt_quiet;
1591
# 1}}}
1592
# Step 6:  Count code, comments, blank lines.  {{{1
1593
#
1594
 
1595
my %Results_by_Language = ();
1596
my %Results_by_File     = ();
1597
my $nCounted = 0;
1598
foreach my $file (sort keys %unique_source_file) {
1599
    ++$nCounted;
1600
    printf "Counting:  %d\r", $nCounted 
1601
        unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
1602
    next if $Ignored{$file};
1603
    if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
1604
        $Ignored{$file} = "--include-lang=$Language{$file}";
1605
        next;
1606
    }
1607
    if ($Exclude_Language{$Language{$file}}) {
1608
        $Ignored{$file} = "--exclude-lang=$Language{$file}";
1609
        next;
1610
    }
1611
    my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
1612
    if ($Filters_by_Language_Language_file) {
1613
        if ($Language{$file} eq "(unknown)") {
1614
            $Ignored{$file} = "language unknown (#1)";
1615
        } else {
1616
            $Ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
1617
        }
1618
        next;
1619
    }
1620
 
1621
    my ($all_line_count,
1622
        $blank_count   ,
1623
        $comment_count ,
1624
       ) = call_counter($file, $Language{$file}, \@Errors);
1625
    my $code_count = $all_line_count - $blank_count - $comment_count;
1626
    if ($opt_by_file) {
1627
        $Results_by_File{$file}{'code'   } = $code_count     ;
1628
        $Results_by_File{$file}{'blank'  } = $blank_count    ;
1629
        $Results_by_File{$file}{'comment'} = $comment_count  ;
1630
        $Results_by_File{$file}{'lang'   } = $Language{$file};
1631
        $Results_by_File{$file}{'nFiles' } = 1;
1632
    } else {
1633
        $Results_by_File{$file} = 1;  # just keep track of counted files
1634
    }
1635
 
1636
    $Results_by_Language{$Language{$file}}{'nFiles'}++;
1637
    $Results_by_Language{$Language{$file}}{'code'}    += $code_count   ;
1638
    $Results_by_Language{$Language{$file}}{'blank'}   += $blank_count  ;
1639
    $Results_by_Language{$Language{$file}}{'comment'} += $comment_count;
1640
}
1641
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
1642
write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
1643
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
1644
# 1}}}
1645
# Step 7:  Assemble results.                   {{{1
1646
#
1647
my $end_time = get_time();
1648
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) 
1649
    unless $opt_quiet;
1650
print_errors(\%Error_Codes, \@Errors) if @Errors;
1651
exit unless %Results_by_Language;
1652
 
1653
generate_sql($end_time - $start_time,
1654
            \%Results_by_File, \%Scale_Factor) if $opt_sql;
1655
 
1656
exit if $skip_generate_report;
1657
if      ($opt_by_file_by_lang) {
1658
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1659
                                      "by file",
1660
                                      \%Results_by_File,    \%Scale_Factor);
1661
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1662
                                      "by language",
1663
                                      \%Results_by_Language, \%Scale_Factor);
1664
} elsif ($opt_by_file) {
1665
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1666
                                      "by file",
1667
                                      \%Results_by_File,    \%Scale_Factor);
1668
} else {
1669
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1670
                                      "by language",
1671
                                      \%Results_by_Language, \%Scale_Factor);
1672
}
1673
# 1}}}
1674
}
1675
if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); } 
1676
else                  { print "\n", join("\n", @Lines_Out), "\n"; }
1677
if ($opt_count_diff) {
1678
    ++$opt_count_diff;
1679
    exit if $opt_count_diff > 3;
1680
    goto Top_of_Processing_Loop;
1681
}
1682
 
1683
sub process_exclude_list_file {              # {{{1
1684
    my ($list_file      , # in
1685
        $rh_exclude_dir , # out
1686
        $rh_ignored     , # out
1687
       ) = @_;
1688
    # note: references global @file_list
1689
    print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
1690
    # reject a specific set of files and/or directories
1691
    my @reject_list   = read_list_file($list_file);
1692
    my @file_reject_list = ();
1693
    foreach my $F_or_D (@reject_list) {
1694
        if (is_dir($F_or_D)) {
1695
            $rh_exclude_dir->{$F_or_D} = 1;
1696
        } elsif (is_file($F_or_D)) {
1697
            push @file_reject_list, $F_or_D;
1698
        }
1699
    }
1700
 
1701
    # Normalize file names for better comparison.
1702
    my %normalized_input   = normalize_file_names(@file_list);
1703
    my %normalized_reject  = normalize_file_names(@file_reject_list);
1704
    my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
1705
    foreach my $F (keys %normalized_input) {
1706
        if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
1707
            my $orig_F = $normalized_input{$F};
1708
            $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
1709
            print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n" 
1710
                if $opt_v > 1;
1711
        }
1712
    }
1713
    print "<- process_exclude_list_file\n" if $opt_v > 2;
1714
} # 1}}}
1715
sub combine_results {                        # {{{1
1716
    # returns 1 if the inputs are categorized by language
1717
    #         0 if no identifiable language was found
1718
    my ($ra_report_files, # in
1719
        $report_type    , # in  "by language" or "by report file"
1720
        $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
1721
        $rhaa_Filters_by_Language , # in
1722
       ) = @_;
1723
 
1724
    print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
1725
    my $found_language = 0;
1726
 
1727
    foreach my $file (@{$ra_report_files}) {
1728
        my $IN = new IO::File $file, "r";
1729
        if (!defined $IN) {
1730
            warn "Unable to read $file; ignoring.\n";
1731
            next;
1732
        }
1733
        while (<$IN>) {
1734
            next if /^(http|Language|SUM|-----)/;
1735
            if (!$opt_by_file  and
1736
                m{^(.*?)\s+         # language
1737
                   (\d+)\s+         # files
1738
                   (\d+)\s+         # blank
1739
                   (\d+)\s+         # comments
1740
                   (\d+)\s+         # code
1741
                   (                #    next four entries missing with -nno3
1742
                   x\s+             # x
1743
                   \d+\.\d+\s+      # scale
1744
                   =\s+             # =
1745
                   (\d+\.\d+)\s*    # scaled code
1746
                   )?
1747
                   $}x) {
1748
                if ($report_type eq "by language") {
1749
                    next unless @{$rhaa_Filters_by_Language->{$1}};
1750
                    # above test necessary to avoid trying to sum reports
1751
                    # of reports (which have no language breakdown).
1752
                    $found_language = 1;
1753
                    $rhh_count->{$1   }{'nFiles' } += $2;
1754
                    $rhh_count->{$1   }{'blank'  } += $3;
1755
                    $rhh_count->{$1   }{'comment'} += $4;
1756
                    $rhh_count->{$1   }{'code'   } += $5;
1757
                    $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
1758
                } else {
1759
                    $rhh_count->{$file}{'nFiles' } += $2;
1760
                    $rhh_count->{$file}{'blank'  } += $3;
1761
                    $rhh_count->{$file}{'comment'} += $4;
1762
                    $rhh_count->{$file}{'code'   } += $5;
1763
                    $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
1764
                }
1765
            } elsif ($opt_by_file  and
1766
                m{^(.*?)\s+         # language
1767
                   (\d+)\s+         # blank
1768
                   (\d+)\s+         # comments
1769
                   (\d+)\s+         # code
1770
                   (                #    next four entries missing with -nno3
1771
                   x\s+             # x
1772
                   \d+\.\d+\s+      # scale
1773
                   =\s+             # =
1774
                   (\d+\.\d+)\s*    # scaled code
1775
                   )?
1776
                   $}x) {
1777
                if ($report_type eq "by language") {
1778
                    next unless %{$rhaa_Filters_by_Language->{$1}};
1779
                    # above test necessary to avoid trying to sum reports
1780
                    # of reports (which have no language breakdown).
1781
                    $found_language = 1;
1782
                    $rhh_count->{$1   }{'nFiles' } +=  1;
1783
                    $rhh_count->{$1   }{'blank'  } += $2;
1784
                    $rhh_count->{$1   }{'comment'} += $3;
1785
                    $rhh_count->{$1   }{'code'   } += $4;
1786
                    $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
1787
                } else {
1788
                    $rhh_count->{$file}{'nFiles' } +=  1;
1789
                    $rhh_count->{$file}{'blank'  } += $2;
1790
                    $rhh_count->{$file}{'comment'} += $3;
1791
                    $rhh_count->{$file}{'code'   } += $4;
1792
                    $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
1793
                }
1794
            }
1795
        }
1796
    }
1797
    print "<- combine_results\n" if $opt_v > 2;
1798
    return $found_language;
1799
} # 1}}}
1800
sub compute_denominator {                    # {{{1
1801
    my ($method, $nCode, $nComment, $nBlank, ) = @_;
1802
    print "-> compute_denominator\n" if $opt_v > 2;
1803
    my %den        = ( "c" => $nCode );
1804
       $den{"cm"}  = $den{"c"}  + $nComment;
1805
       $den{"cmb"} = $den{"cm"} + $nBlank;
1806
       $den{"cb"}  = $den{"c"}  + $nBlank;
1807
 
1808
    print "<- compute_denominator\n" if $opt_v > 2;
1809
    return $den{ $method };
1810
} # 1}}}
1811
sub diff_report     {                        # {{{1
1812
    # returns an array of lines containing the results
1813
    print "-> diff_report\n" if $opt_v > 2;
1814
 
1815
    if ($opt_xml or $opt_yaml) {
1816
        print "<- diff_report\n" if $opt_v > 2;
1817
        return diff_xml_yaml_report(@_) 
1818
    } elsif ($opt_csv) {
1819
        print "<- diff_report\n" if $opt_v > 2;
1820
        return diff_csv_report(@_) 
1821
    }
1822
 
1823
    my ($version    , # in
1824
        $elapsed_sec, # in
1825
        $report_type, # in  "by language" | "by report file" | "by file"
1826
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
1827
        $rh_scale   , # in
1828
       ) = @_;
1829
 
1830
#use Data::Dumper;
1831
#print "diff_report: ", Dumper($rhhh_count), "\n";
1832
    my @results       = ();
1833
 
1834
    my $languages     = ();
1835
    my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
1836
    my $max_len       = 0;
1837
    foreach my $language (keys %{$rhhh_count}) {
1838
        foreach my $V (qw(nFiles blank comment code)) {
1839
            foreach my $S (qw(added same modified removed)) {
1840
                $rhhh_count->{$language}{$V}{$S} = 0 unless
1841
                    defined $rhhh_count->{$language}{$V}{$S};
1842
                $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
1843
            }
1844
        }
1845
        $max_len      = length($language) if length($language) > $max_len;
1846
    }
1847
    my $column_1_offset = 0;
1848
       $column_1_offset = $max_len - 17 if $max_len > 17;
1849
    $elapsed_sec = 0.5 unless $elapsed_sec;
1850
 
1851
    my $spacing_0 = 23;
1852
    my $spacing_1 = 13;
1853
    my $spacing_2 =  9;
1854
    my $spacing_3 = 17;
1855
    if (!$opt_3) {
1856
        $spacing_1 = 19;
1857
        $spacing_2 = 14;
1858
        $spacing_3 = 28;
1859
    }
1860
    $spacing_0 += $column_1_offset;
1861
    $spacing_1 += $column_1_offset;
1862
    $spacing_3 += $column_1_offset;
1863
    my %Format = (
1864
        '1' => { 'xml' => 'name="%s" ',
1865
                 'txt' => "\%-${spacing_0}s ",
1866
               },
1867
        '2' => { 'xml' => 'name="%s" ',
1868
                 'txt' => "\%-${spacing_3}s ",
1869
               },
1870
        '3' => { 'xml' => 'files_count="%d" ',
1871
                 'txt' => '%5d ',
1872
               },
1873
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
1874
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
1875
               },
1876
        '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
1877
                 'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
1878
               },
1879
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
1880
                 'txt' => ' x %6.2f = %14.2f',
1881
               },
1882
    );
1883
    my $Style = "txt";
1884
       $Style = "xml" if $opt_xml ;
1885
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
1886
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
1887
 
1888
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
1889
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) 
1890
            if (!$opt_3) and (68 + $column_1_offset) > 79;
1891
    my $data_line  = "";
1892
    my $first_column;
1893
    my $BY_LANGUAGE = 0;
1894
    my $BY_FILE     = 0;
1895
    if      ($report_type eq "by language") {
1896
        $first_column = "Language";
1897
        $BY_LANGUAGE  = 1;
1898
    } elsif ($report_type eq "by file")     {
1899
        $first_column = "File";
1900
        $BY_FILE      = 1;
1901
    } else {
1902
        $first_column = "Report File";
1903
    }
1904
 
1905
    my $header_line  = sprintf "%s v %s", $URL, $version;
1906
    my $sum_files    = 1;
1907
    my $sum_lines    = 1;
1908
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
1909
                        $elapsed_sec           ,
1910
                        $sum_files/$elapsed_sec,
1911
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
1912
    if ($Style eq "txt") {
1913
        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
1914
    } elsif ($Style eq "csv") {
1915
        die "csv";
1916
    }
1917
 
1918
    # column headers
1919
    if (!$opt_3 and $BY_FILE) {
1920
        my $spacing_n = $spacing_1 - 11;
1921
        $data_line  = sprintf "%-${spacing_n}s" , $first_column;
1922
    } else {
1923
        $data_line  = sprintf "%-${spacing_1}s ", $first_column;
1924
    }
1925
    if ($BY_FILE) {
1926
        $data_line .= sprintf "%${spacing_2}s"   , ""     ;
1927
    } else {
1928
        $data_line .= sprintf "%${spacing_2}s "  , "files";
1929
    }
1930
    my $PCT_symbol = "";
1931
       $PCT_symbol = " \%" if $opt_by_percent;
1932
    $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
1933
        "blank${PCT_symbol}"         ,
1934
        "comment${PCT_symbol}"       ,
1935
        "code";
1936
 
1937
    if ($Style eq "txt") {
1938
        push @results, $data_line;
1939
        push @results, $hyphen_line;
1940
    }
1941
 
1942
####foreach my $lang_or_file (keys %{$rhhh_count}) {
1943
####    $rhhh_count->{$lang_or_file}{'code'} = 0 unless 
1944
####        defined $rhhh_count->{$lang_or_file}{'code'};
1945
####}
1946
    foreach my $lang_or_file (sort {
1947
                                 $rhhh_count->{$b}{'code'} <=>
1948
                                 $rhhh_count->{$a}{'code'}
1949
                               }
1950
                          keys %{$rhhh_count}) {
1951
 
1952
        if ($BY_FILE) {
1953
            push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
1954
        } else {
1955
            push @results, $lang_or_file;
1956
        }
1957
        foreach my $S (qw(same modified added removed)) {
1958
            my $indent = $spacing_1 - 2;
1959
            my $line .= sprintf " %-${indent}s", $S;
1960
            if ($BY_FILE) {
1961
                $line .= sprintf "   ";
1962
            } else {
1963
                $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
1964
            }
1965
            if ($opt_by_percent) {
1966
                my $DEN = compute_denominator($opt_by_percent  ,
1967
                    $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
1968
                    $rhhh_count->{$lang_or_file}{'comment'}{$S},
1969
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
1970
                if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
1971
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
1972
                        $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
1973
                        $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
1974
                        $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1975
                } else {
1976
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
1977
                        0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1978
                }
1979
            } else {
1980
                $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
1981
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
1982
                    $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
1983
                    $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1984
            }
1985
            push @results, $line;
1986
        }
1987
    }
1988
    push @results, "-" x 79;
1989
    push @results, "SUM:";
1990
    foreach my $S (qw(same modified added removed)) {
1991
        my $indent = $spacing_1 - 2;
1992
        my $line .= sprintf " %-${indent}s", $S;
1993
            if ($BY_FILE) {
1994
                $line .= sprintf "   ";
1995
            } else {
1996
                $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
1997
            }
1998
        if ($opt_by_percent) {
1999
            my $DEN = compute_denominator($opt_by_percent,
2000
                $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
2001
            if ($sum{'code'}{$S} > 0) {
2002
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
2003
                    $sum{'blank'}{$S}   / $DEN * 100,
2004
                    $sum{'comment'}{$S} / $DEN * 100,
2005
                    $sum{'code'}{$S}    ;
2006
            } else {
2007
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
2008
                    0.0, 0.0, $sum{'code'}{$S}    ;
2009
            }
2010
        } else {
2011
            $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
2012
                $sum{'blank'}{$S}   ,
2013
                $sum{'comment'}{$S} ,
2014
                $sum{'code'}{$S}    ;
2015
        }
2016
        push @results, $line;
2017
    }
2018
    push @results, "-" x 79;
2019
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2020
    print "<- diff_report\n" if $opt_v > 2;
2021
 
2022
    return @results;
2023
} # 1}}}
2024
sub xml_or_yaml_header {                     # {{{1
2025
    my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
2026
    print "-> xml_or_yaml_header\n" if $opt_v > 2;
2027
    my $header      = "";
2028
    my $file_rate   = $sum_files/$elapsed_sec;
2029
    my $line_rate   = $sum_lines/$elapsed_sec;
2030
    my $type        = ""; 
2031
       $type        = "diff_" if $opt_diff;
2032
    my $report_file = "";
2033
    if ($opt_report_file) {
2034
        if ($opt_sum_reports) {
2035
            if ($by_file) {
2036
                $report_file = "  <report_file>$opt_report_file.file</report_file>"
2037
            } else {
2038
                $report_file = "  <report_file>$opt_report_file.lang</report_file>"
2039
            }
2040
        } else {
2041
            $report_file = "  <report_file>$opt_report_file</report_file>"
2042
        }
2043
    }
2044
    if ($opt_xml) {
2045
        $header = "<?xml version=\"1.0\"?>";
2046
        $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
2047
        $header .= "<${type}results>
2048
<header>
2049
  <cloc_url>$URL</cloc_url>
2050
  <cloc_version>$version</cloc_version>
2051
  <elapsed_seconds>$elapsed_sec</elapsed_seconds>
2052
  <n_files>$sum_files</n_files>
2053
  <n_lines>$sum_lines</n_lines>
2054
  <files_per_second>$file_rate</files_per_second>
2055
  <lines_per_second>$line_rate</lines_per_second>";
2056
        $header .= "\n$report_file"
2057
            if $opt_report_file;
2058
        $header .= "\n</header>";
2059
    } elsif ($opt_yaml) {
2060
        $header = "---\n# $URL
2061
header :
2062
  cloc_url           : http://cloc.sourceforge.net
2063
  cloc_version       : $version
2064
  elapsed_seconds    : $elapsed_sec
2065
  n_files            : $sum_files
2066
  n_lines            : $sum_lines
2067
  files_per_second   : $file_rate
2068
  lines_per_second   : $line_rate";
2069
        if ($opt_report_file) {
2070
            if ($opt_sum_reports) {
2071
                if ($by_file) {
2072
                    $header .= "\n  report_file        : $opt_report_file.file"
2073
                } else {
2074
                    $header .= "\n  report_file        : $opt_report_file.lang"
2075
                }
2076
            } else {
2077
                $header .= "\n  report_file        : $opt_report_file";
2078
            }
2079
        }
2080
    }
2081
    print "<- xml_or_yaml_header\n" if $opt_v > 2;
2082
    return $header;
2083
} # 1}}}
2084
sub diff_xml_yaml_report {                   # {{{1
2085
    # returns an array of lines containing the results
2086
    my ($version    , # in
2087
        $elapsed_sec, # in
2088
        $report_type, # in  "by language" | "by report file" | "by file"
2089
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
2090
        $rh_scale   , # in
2091
       ) = @_;
2092
    print "-> diff_xml_yaml_report\n" if $opt_v > 2;
2093
 
2094
#print "diff_report: ", Dumper($rhhh_count), "\n";
2095
    my @results       = ();
2096
 
2097
    my $languages     = ();
2098
    my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
2099
 
2100
    my $sum_files = 0;
2101
    my $sum_lines = 0;
2102
    foreach my $language (keys %{$rhhh_count}) {
2103
        foreach my $V (qw(nFiles blank comment code)) {
2104
            foreach my $S (qw(added same modified removed)) {
2105
                $rhhh_count->{$language}{$V}{$S} = 0 unless
2106
                    defined $rhhh_count->{$language}{$V}{$S};
2107
                $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
2108
                if ($V eq "nFiles") {
2109
                    $sum_files += $rhhh_count->{$language}{$V}{$S};
2110
                } else {
2111
                    $sum_lines += $rhhh_count->{$language}{$V}{$S};
2112
                }
2113
            }
2114
        }
2115
    }
2116
    $elapsed_sec = 0.5 unless $elapsed_sec;
2117
 
2118
    my $data_line   = "";
2119
    my $BY_LANGUAGE = 0;
2120
    my $BY_FILE     = 0;
2121
    if      ($report_type eq "by language") {
2122
        $BY_LANGUAGE  = 1;
2123
    } elsif ($report_type eq "by file")     {
2124
        $BY_FILE      = 1;
2125
    }
2126
 
2127
    if (!$ALREADY_SHOWED_HEADER) {
2128
        push @results,
2129
              xml_or_yaml_header($URL, $version, $elapsed_sec, 
2130
                                 $sum_files, $sum_lines, $BY_FILE);
2131
        $ALREADY_SHOWED_HEADER = 1;
2132
    }
2133
 
2134
    foreach my $S (qw(same modified added removed)) {
2135
        if ($opt_xml) {
2136
            push @results, "  <$S>";
2137
        } elsif ($opt_yaml) {
2138
            push @results, "$S :";
2139
        }
2140
########foreach my $lang_or_file (keys %{$rhhh_count}) {
2141
########    $rhhh_count->{$lang_or_file}{'code'} = 0 unless 
2142
########        defined $rhhh_count->{$lang_or_file}{'code'};
2143
########}
2144
        foreach my $lang_or_file (sort {
2145
                                     $rhhh_count->{$b}{'code'} <=>
2146
                                     $rhhh_count->{$a}{'code'}
2147
                                   }
2148
                              keys %{$rhhh_count}) {
2149
            my $L = "";
2150
            if ($opt_xml) {
2151
                if ($BY_FILE) {
2152
                    $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ", 
2153
                        xml_metachars(
2154
                            rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
2155
                } else {
2156
                    $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
2157
                            $lang_or_file ,
2158
                            $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
2159
                }
2160
                if ($opt_by_percent) {
2161
                  my $DEN = compute_denominator($opt_by_percent            ,
2162
                                $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
2163
                                $rhhh_count->{$lang_or_file}{'comment'}{$S},
2164
                                $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
2165
                  foreach my $T (qw(blank comment)) {
2166
                      if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
2167
                        $L .= sprintf "%s=\"%.2f\" ", 
2168
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
2169
                      } else {
2170
                        $L .= sprintf "%s=\"0.0\" ", $T;
2171
                      }
2172
                  }
2173
                  foreach my $T (qw(code)) {
2174
                      $L .= sprintf "%s=\"%d\" ", 
2175
                              $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2176
                  }
2177
                } else {
2178
                  foreach my $T (qw(blank comment code)) {
2179
                      $L .= sprintf "%s=\"%d\" ", 
2180
                              $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2181
                  }
2182
                }
2183
                push @results, $L . "/>";
2184
            } elsif ($opt_yaml) {
2185
                if ($BY_FILE) {
2186
                    push @results, sprintf "  - file : %s", 
2187
                                   rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2188
                    push @results, sprintf "    files_count : 1", 
2189
                } else {
2190
                    push @results, sprintf "  - language : %s", $lang_or_file;
2191
                    push @results, sprintf "    files_count : %d", 
2192
                            $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
2193
                }
2194
                if ($opt_by_percent) {
2195
                    my $DEN = compute_denominator($opt_by_percent  ,
2196
                        $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
2197
                        $rhhh_count->{$lang_or_file}{'comment'}{$S},
2198
                        $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
2199
                    foreach my $T (qw(blank comment)) {
2200
                        if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
2201
                            push @results, sprintf "    %s : %.2f", 
2202
                                    $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
2203
                        } else {
2204
                            push @results, sprintf "    %s : 0.0", $T;
2205
                        }
2206
                    }
2207
                    foreach my $T (qw(code)) {
2208
                        push @results, sprintf "    %s : %d", 
2209
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2210
                    }
2211
                } else {
2212
                    foreach my $T (qw(blank comment code)) {
2213
                        push @results, sprintf "    %s : %d", 
2214
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2215
                    }
2216
                }
2217
            }
2218
        }
2219
 
2220
        if ($opt_xml) {
2221
            my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
2222
            if ($opt_by_percent) {
2223
              my $DEN = compute_denominator($opt_by_percent,
2224
                            $sum{'code'}{$S}   , 
2225
                            $sum{'comment'}{$S}, 
2226
                            $sum{'blank'}{$S}  );
2227
              foreach my $V (qw(blank comment)) {
2228
                  if ($sum{'code'}{$S} > 0) {
2229
                      $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
2230
                  } else {
2231
                      $L .= sprintf "%s=\"0.0\" ", $V;
2232
                  }
2233
              }
2234
              foreach my $V (qw(code)) {
2235
                  $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
2236
              }
2237
            } else {
2238
              foreach my $V (qw(blank comment code)) {
2239
                  $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
2240
              }
2241
            }
2242
            push @results, $L . "/>";
2243
            push @results, "  </$S>";
2244
        } elsif ($opt_yaml) {
2245
            push @results, sprintf "%s_total :\n    sum_files : %d", 
2246
                                $S, $sum{'nFiles'}{$S};
2247
            if ($opt_by_percent) {
2248
                my $DEN = compute_denominator($opt_by_percent    ,
2249
                                              $sum{'code'}{$S}   ,
2250
                                              $sum{'comment'}{$S},
2251
                                              $sum{'blank'}{$S}  );
2252
                foreach my $V (qw(blank comment)) {
2253
                    if ($sum{'code'}{$S} > 0) {
2254
                        push @results, sprintf "    %s : %.2f", $V, $sum{$V}{$S} / $DEN * 100;
2255
                    } else {
2256
                        push @results, sprintf "    %s : 0.0", $V;
2257
                    }
2258
                }
2259
                foreach my $V (qw(code)) {
2260
                    push @results, sprintf "    %s : %d", $V, $sum{$V}{$S};
2261
                }
2262
            } else {
2263
                foreach my $V (qw(blank comment code)) {
2264
                    push @results, sprintf "    %s : %d", $V, $sum{$V}{$S};
2265
                }
2266
            }
2267
        }
2268
    }
2269
 
2270
    if ($opt_xml) {
2271
        push @results, "</diff_results>";
2272
    }
2273
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2274
    print "<- diff_xml_yaml_report\n" if $opt_v > 2;
2275
    return @results;
2276
} # 1}}}
2277
sub diff_csv_report {                        # {{{1
2278
    # returns an array of lines containing the results
2279
    my ($version    , # in
2280
        $elapsed_sec, # in
2281
        $report_type, # in  "by language" | "by report file" | "by file"
2282
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
2283
        $rh_scale   , # in  unused
2284
       ) = @_;
2285
    print "-> diff_csv_report\n" if $opt_v > 2;
2286
 
2287
#use Data::Dumper;
2288
#print "diff_csv_report: ", Dumper($rhhh_count), "\n";
2289
#die;
2290
    my @results       = ();
2291
    my $languages     = ();
2292
 
2293
    my $data_line   = "";
2294
    my $BY_LANGUAGE = 0;
2295
    my $BY_FILE     = 0;
2296
    if      ($report_type eq "by language") {
2297
        $BY_LANGUAGE  = 1;
2298
    } elsif ($report_type eq "by file")     {
2299
        $BY_FILE      = 1;
2300
    }
2301
    my $DELIM = ",";
2302
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
2303
 
2304
    $elapsed_sec = 0.5 unless $elapsed_sec;
2305
 
2306
    my $line = "Language${DELIM} ";
2307
       $line = "File${DELIM} " if $BY_FILE;
2308
    foreach my $item (qw(files blank comment code)) {
2309
        next if $BY_FILE and $item eq 'files';
2310
        foreach my $symbol ( '==', '!=', '+', '-', ) {
2311
            $line .= "$symbol $item${DELIM} ";
2312
        }
2313
    }
2314
    $line .= "\"$URL v $version T=$elapsed_sec s\"";
2315
    push @results, $line;
2316
 
2317
    foreach my $lang_or_file (keys %{$rhhh_count}) {
2318
        $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless 
2319
            defined $rhhh_count->{$lang_or_file}{'code'};
2320
    }
2321
    foreach my $lang_or_file (sort {
2322
                                 $rhhh_count->{$b}{'code'} <=>
2323
                                 $rhhh_count->{$a}{'code'}
2324
                               }
2325
                          keys %{$rhhh_count}) {
2326
        if ($BY_FILE) {
2327
            $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
2328
        } else {
2329
            $line = $lang_or_file . "${DELIM} ";
2330
        }
2331
        if ($opt_by_percent) {
2332
          foreach my $item (qw(nFiles)) {
2333
              next if $BY_FILE and $item eq 'nFiles';
2334
              foreach my $symbol (qw(same modified added removed)) {
2335
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2336
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2337
                  } else {
2338
                      $line .= "0${DELIM} ";
2339
                  }
2340
              }
2341
          }
2342
          foreach my $item (qw(blank comment)) {
2343
              foreach my $symbol (qw(same modified added removed)) {
2344
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
2345
                      defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
2346
                      $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
2347
                      $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
2348
                  } else {
2349
                      $line .= "0.00${DELIM} ";
2350
                  }
2351
              }
2352
          }
2353
          foreach my $item (qw(code)) {
2354
              foreach my $symbol (qw(same modified added removed)) {
2355
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2356
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2357
                  } else {
2358
                      $line .= "0${DELIM} ";
2359
                  }
2360
              }
2361
          }
2362
        } else {
2363
          foreach my $item (qw(nFiles blank comment code)) {
2364
              next if $BY_FILE and $item eq 'nFiles';
2365
              foreach my $symbol (qw(same modified added removed)) {
2366
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2367
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2368
                  } else {
2369
                      $line .= "0${DELIM} ";
2370
                  }
2371
              }
2372
          }
2373
        }
2374
        push @results, $line;
2375
    }
2376
 
2377
    print "<- diff_csv_report\n" if $opt_v > 2;
2378
    return @results;
2379
} # 1}}}
2380
sub rm_leading_tempdir {                     # {{{1
2381
    my ($in_file, $rh_temp_dirs, ) = @_;
2382
    my $clean_filename = $in_file;
2383
    foreach my $temp_d (keys %{$rh_temp_dirs}) {
2384
        if ($ON_WINDOWS) {
2385
        # \ -> / necessary to allow the next if test's
2386
        # m{} to work in the presence of spaces in file names
2387
            $temp_d         =~ s{\\}{/}g;
2388
            $clean_filename =~ s{\\}{/}g;
2389
        }
2390
        if ($clean_filename =~ m{^$temp_d/}) {
2391
            $clean_filename =~ s{^$temp_d/}{};
2392
            last;
2393
        }
2394
    }
2395
    $clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \
2396
    return $clean_filename;
2397
} # 1}}}
2398
sub generate_sql    {                        # {{{1
2399
    my ($elapsed_sec, # in
2400
        $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
2401
        $rh_scale   , # in
2402
       ) = @_;
2403
    print "-> generate_sql\n" if $opt_v > 2;
2404
 
2405
    $opt_sql_project = cwd() unless defined $opt_sql_project;
2406
    $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
2407
 
2408
    my $schema = undef;
2409
    if ($opt_sql_style eq "oracle") {
2410
        $schema = "
2411
CREATE TABLE metadata
2412
(
2413
  timestamp   TIMESTAMP,
2414
  project     VARCHAR2(500 CHAR),
2415
  elapsed_s   NUMBER(10, 6)
2416
) 
2417
/
2418
 
2419
CREATE TABLE t
2420
( 
2421
  project        VARCHAR2(500 CHAR),
2422
  language       VARCHAR2(500 CHAR),
2423
  file_fullname  VARCHAR2(500 CHAR),
2424
  file_dirname   VARCHAR2(500 CHAR),
2425
  file_basename  VARCHAR2(500 CHAR),
2426
  nblank         INTEGER,
2427
  ncomment       INTEGER,
2428
  ncode          INTEGER,
2429
  nscaled        NUMBER(10, 6)
2430
)
2431
/
2432
 
2433
";
2434
    } else {
2435
        $schema = "
2436
create table metadata (          -- $URL v $VERSION
2437
                timestamp varchar(500),    
2438
                Project   varchar(500),    
2439
                elapsed_s real);   
2440
create table t        (
2441
                Project       varchar(500)   ,  
2442
                Language      varchar(500)   ,  
2443
                File          varchar(500)   ,  
2444
                File_dirname  varchar(500)   ,  
2445
                File_basename varchar(500)   ,  
2446
                nBlank        integer        ,  
2447
                nComment      integer        ,  
2448
                nCode         integer        ,  
2449
                nScaled       real           ); 
2450
";
2451
    }
2452
    $opt_sql = "-" if $opt_sql eq "1";
2453
 
2454
    my $open_mode = ">";
2455
       $open_mode = ">>" if $opt_sql_append;
2456
 
2457
    my $fh = new IO::File; # $opt_sql, "w";
2458
    if (!$fh->open("${open_mode}${opt_sql}")) {
2459
        die "Unable to write to $opt_sql  $!\n";
2460
    }
2461
    print $fh $schema unless defined $opt_sql_append;
2462
 
2463
    if ($opt_sql_style eq "oracle") {
2464
        printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
2465
                    strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
2466
                    $opt_sql_project, $elapsed_sec;
2467
    } else {
2468
        print $fh "begin transaction;\n";
2469
        printf $fh "insert into metadata values('%s', '%s', %f);\n",
2470
                    strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
2471
                    $opt_sql_project, $elapsed_sec;
2472
    }
2473
 
2474
    my $nIns = 0;
2475
    foreach my $file (keys %{$rhh_count}) {
2476
        my $language = $rhh_count->{$file}{'lang'};
2477
        my $clean_filename = $file;
2478
        # If necessary (that is, if the input contained an
2479
        # archive file [.tar.gz, etc]), strip the temporary
2480
        # directory name which was used to expand the archive
2481
        # from the file name.
2482
 
2483
        $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
2484
        $clean_filename =~ s/\'/''/g;  # double embedded single quotes
2485
                                       # to escape them 
2486
 
2487
        printf $fh "insert into t values('%s', '%s', '%s', '%s', '%s', " .
2488
                   "%d, %d, %d, %f);\n",
2489
                    $opt_sql_project           ,
2490
                    $language                  ,
2491
                    $clean_filename            ,
2492
                    dirname( $clean_filename)  ,
2493
                    basename($clean_filename)  ,
2494
                    $rhh_count->{$file}{'blank'},
2495
                    $rhh_count->{$file}{'comment'},
2496
                    $rhh_count->{$file}{'code'}   ,
2497
                    $rhh_count->{$file}{'code'}*$rh_scale->{$language};
2498
 
2499
        ++$nIns;
2500
        if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
2501
            print $fh "commit;\n";
2502
            print $fh "begin transaction;\n";
2503
        }
2504
    }
2505
    if ($opt_sql_style ne "oracle") {
2506
        print $fh "commit;\n";
2507
    }
2508
 
2509
    $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
2510
    print "<- generate_sql\n" if $opt_v > 2;
2511
 
2512
    # sample query:
2513
    #
2514
    #   select project, language, 
2515
    #          sum(nCode)     as Code, 
2516
    #          sum(nComment)  as Comments, 
2517
    #          sum(nBlank)    as Blank,  
2518
    #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
2519
    #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
2520
    #          from t group by Project, Language order by Project, Code desc;
2521
    #
2522
} # 1}}}
2523
sub output_header   {                        # {{{1
2524
    my ($header_line, 
2525
        $hyphen_line,
2526
        $BY_FILE    ,)    = @_;
2527
    print "-> output_header\n" if $opt_v > 2;
2528
    my @R = ();
2529
    if      ($opt_xml) {
2530
        if (!$ALREADY_SHOWED_XML_SECTION) {
2531
            push @R, "<?xml version=\"1.0\"?>";
2532
            push @R, '<?xml-stylesheet type="text/xsl" href="' .
2533
                            $opt_xsl . '"?>' if $opt_xsl;
2534
            push @R, "<results>";
2535
            push @R, "<header>$header_line</header>";
2536
            $ALREADY_SHOWED_XML_SECTION = 1;
2537
        }
2538
        if ($BY_FILE) {
2539
            push @R, "<files>";
2540
        } else {
2541
            push @R, "<languages>";
2542
        }
2543
    } elsif ($opt_yaml) {
2544
        push @R, "---\n# $header_line";
2545
    } elsif ($opt_csv) {
2546
        # append the header to the end of the column headers
2547
        # to keep the output a bit cleaner from a spreadsheet
2548
        # perspective
2549
    } else {
2550
        if ($ALREADY_SHOWED_HEADER) {
2551
            push @R, "";
2552
        } else {
2553
            push @R, $header_line;
2554
            $ALREADY_SHOWED_HEADER = 1;
2555
        }
2556
        push @R, $hyphen_line;
2557
    }
2558
    print "<- output_header\n" if $opt_v > 2;
2559
    return @R;
2560
} # 1}}}
2561
sub generate_report {                        # {{{1
2562
    # returns an array of lines containing the results
2563
    my ($version    , # in
2564
        $elapsed_sec, # in
2565
        $report_type, # in  "by language" | "by report file" | "by file"
2566
        $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
2567
        $rh_scale   , # in
2568
       ) = @_;
2569
 
2570
    print "-> generate_report\n" if $opt_v > 2;
2571
    my $DELIM = ",";
2572
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
2573
 
2574
    my @results       = ();
2575
 
2576
    my $languages     = ();
2577
 
2578
    my $sum_files     = 0;
2579
    my $sum_code      = 0;
2580
    my $sum_blank     = 0;
2581
    my $sum_comment   = 0;
2582
    my $max_len       = 0;
2583
    foreach my $language (keys %{$rhh_count}) {
2584
        $sum_files   += $rhh_count->{$language}{'nFiles'} ;
2585
        $sum_blank   += $rhh_count->{$language}{'blank'}  ;
2586
        $sum_comment += $rhh_count->{$language}{'comment'};
2587
        $sum_code    += $rhh_count->{$language}{'code'}   ;
2588
        $max_len      = length($language) if length($language) > $max_len;
2589
    }
2590
    my $column_1_offset = 0;
2591
       $column_1_offset = $max_len - 17 if $max_len > 17;
2592
    my $sum_lines = $sum_blank + $sum_comment + $sum_code;
2593
    $elapsed_sec = 0.5 unless $elapsed_sec;
2594
 
2595
    my $spacing_0 = 23;
2596
    my $spacing_1 = 13;
2597
    my $spacing_2 =  9;
2598
    my $spacing_3 = 17;
2599
    if (!$opt_3) {
2600
        $spacing_1 = 19;
2601
        $spacing_2 = 14;
2602
        $spacing_3 = 28;
2603
    }
2604
    $spacing_0 += $column_1_offset;
2605
    $spacing_1 += $column_1_offset;
2606
    $spacing_3 += $column_1_offset;
2607
    my %Format = (
2608
        '1' => { 'xml' => 'name="%s" ',
2609
                 'txt' => "\%-${spacing_0}s ",
2610
               },
2611
        '2' => { 'xml' => 'name="%s" ',
2612
                 'txt' => "\%-${spacing_3}s ",
2613
               },
2614
        '3' => { 'xml' => 'files_count="%d" ',
2615
                 'txt' => '%5d ',
2616
               },
2617
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
2618
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
2619
               },
2620
        '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
2621
                 'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
2622
               },
2623
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
2624
                 'txt' => ' x %6.2f = %14.2f',
2625
               },
2626
    );
2627
    my $Style = "txt";
2628
       $Style = "xml" if $opt_xml ;
2629
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
2630
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
2631
 
2632
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
2633
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) 
2634
            if (!$opt_3) and (68 + $column_1_offset) > 79;
2635
    my $data_line  = "";
2636
    my $first_column;
2637
    my $BY_LANGUAGE = 0;
2638
    my $BY_FILE     = 0;
2639
    if      ($report_type eq "by language") {
2640
        $first_column = "Language";
2641
        $BY_LANGUAGE  = 1;
2642
    } elsif ($report_type eq "by file")     {
2643
        $first_column = "File";
2644
        $BY_FILE      = 1;
2645
    } elsif ($report_type eq "by report file")     {
2646
        $first_column = "File";
2647
    } else {
2648
        $first_column = "Report File";
2649
    }
2650
 
2651
    my $header_line  = sprintf "%s v %s", $URL, $version;
2652
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
2653
                        $elapsed_sec           ,
2654
                        $sum_files/$elapsed_sec,
2655
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
2656
    if ($opt_xml or $opt_yaml) {
2657
        if (!$ALREADY_SHOWED_HEADER) {
2658
            push @results, xml_or_yaml_header($URL, $version, $elapsed_sec, 
2659
                                              $sum_files, $sum_lines, $BY_FILE);
2660
            $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
2661
            # --sum-reports yields two xml or yaml files, one by
2662
            # language and one by report file, each of which needs a header
2663
        }
2664
        if ($opt_xml) {
2665
            if ($BY_FILE or ($report_type eq "by report file")) {
2666
                push @results, "<files>";
2667
            } else {
2668
                push @results, "<languages>";
2669
            }
2670
        }
2671
    } else {
2672
        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
2673
    }
2674
 
2675
    if ($Style eq "txt") {
2676
        # column headers
2677
        if (!$opt_3 and $BY_FILE) {
2678
            my $spacing_n = $spacing_1 - 11;
2679
            $data_line  = sprintf "%-${spacing_n}s ", $first_column;
2680
        } else {
2681
            $data_line  = sprintf "%-${spacing_1}s ", $first_column;
2682
        }
2683
        if ($BY_FILE) {
2684
            $data_line .= sprintf "%${spacing_2}s "  , " "    ;
2685
        } else {
2686
            $data_line .= sprintf "%${spacing_2}s "  , "files";
2687
        }
2688
        my $PCT_symbol = "";
2689
           $PCT_symbol = " \%" if $opt_by_percent;
2690
        $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
2691
            "blank${PCT_symbol}"   ,
2692
            "comment${PCT_symbol}" ,
2693
            "code";
2694
        $data_line .= sprintf " %8s   %14s",
2695
            "scale"         ,
2696
            "3rd gen. equiv"
2697
              if $opt_3;
2698
        push @results, $data_line;
2699
        push @results, $hyphen_line;
2700
    }
2701
    if ($opt_csv) {
2702
        my $header2;
2703
        if ($BY_FILE) {
2704
            $header2 = "language${DELIM}filename";
2705
        } else {
2706
            $header2 = "files${DELIM}language";
2707
        }
2708
        $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
2709
        $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
2710
        $header2 .= ${DELIM} . '"' . $header_line . '"';
2711
        push @results, $header2;
2712
    }
2713
 
2714
    my $sum_scaled = 0;
2715
####foreach my $lang_or_file (keys %{$rhh_count}) {
2716
####    $rhh_count->{$lang_or_file}{'code'} = 0 unless 
2717
####        defined $rhh_count->{$lang_or_file}{'code'};
2718
####}
2719
    foreach my $lang_or_file (sort {
2720
                                 $rhh_count->{$b}{'code'} <=>
2721
                                 $rhh_count->{$a}{'code'}
2722
                               }
2723
                          keys %{$rhh_count}) {
2724
        next if $lang_or_file eq "by report file";
2725
        my ($factor, $scaled);
2726
        if ($BY_LANGUAGE or $BY_FILE) {
2727
            $factor = 1;
2728
            if ($BY_LANGUAGE) {
2729
                if (defined $rh_scale->{$lang_or_file}) {
2730
                    $factor = $rh_scale->{$lang_or_file};
2731
                } else {
2732
                    warn "No scale factor for $lang_or_file; using 1.00";
2733
                }
2734
            } else { # by individual code file
2735
                if ($report_type ne "by report file") {
2736
                    next unless defined $rhh_count->{$lang_or_file}{'lang'};
2737
                    next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
2738
                    $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
2739
                }
2740
            }
2741
            $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
2742
        } else {
2743
            if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
2744
                $opt_3 = 0;
2745
                # If we're summing together files previously generated
2746
                # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
2747
                # this variable will be undefined.  That should only
2748
                # happen when summing together by file however.
2749
            } elsif ($BY_LANGUAGE) {
2750
                warn "Missing scaled language info for $lang_or_file\n";
2751
            }
2752
            if ($opt_3) {
2753
                $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
2754
                $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
2755
            }
2756
        }
2757
 
2758
        if ($BY_FILE) {
2759
            my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2760
               $clean_filename = xml_metachars($clean_filename) if $opt_xml;
2761
            $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
2762
        } else {
2763
            $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
2764
        }
2765
        $data_line .= sprintf $Format{3}{$Style}  , 
2766
                        $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
2767
        if ($opt_by_percent) {
2768
          my $DEN = compute_denominator($opt_by_percent       ,
2769
                        $rhh_count->{$lang_or_file}{'code'}   ,
2770
                        $rhh_count->{$lang_or_file}{'comment'},
2771
                        $rhh_count->{$lang_or_file}{'blank'}  );
2772
          $data_line .= sprintf $Format{5}{$Style}  , 
2773
              $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
2774
              $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
2775
              $rhh_count->{$lang_or_file}{'code'}   ;
2776
        } else {
2777
          $data_line .= sprintf $Format{4}{$Style}  , 
2778
              $rhh_count->{$lang_or_file}{'blank'}  ,
2779
              $rhh_count->{$lang_or_file}{'comment'},
2780
              $rhh_count->{$lang_or_file}{'code'}   ;
2781
        }
2782
        $data_line .= sprintf $Format{6}{$Style}  ,
2783
            $factor                               ,
2784
            $scaled if $opt_3;
2785
        $sum_scaled  += $scaled if $opt_3;
2786
 
2787
        if ($opt_xml) {
2788
            if (defined $rhh_count->{$lang_or_file}{'lang'}) {
2789
                my $lang = $rhh_count->{$lang_or_file}{'lang'};
2790
                if (!defined $languages->{$lang}) {
2791
                    $languages->{$lang} = $lang;
2792
                }
2793
                $data_line.=' language="' . $lang . '" ';
2794
            }
2795
            if ($BY_FILE or ($report_type eq "by report file")) {
2796
                push @results, "  <file " . $data_line . "/>";
2797
            } else {
2798
                push @results, "  <language " . $data_line . "/>";
2799
            }
2800
        } elsif ($opt_yaml) {
2801
            push @results,$lang_or_file . ":";
2802
            push @results,"  nFiles: "  .$rhh_count->{$lang_or_file}{'nFiles'} 
2803
                unless $BY_FILE;
2804
            if ($opt_by_percent) {
2805
              my $DEN = compute_denominator($opt_by_percent       ,
2806
                            $rhh_count->{$lang_or_file}{'code'}   ,
2807
                            $rhh_count->{$lang_or_file}{'comment'},
2808
                            $rhh_count->{$lang_or_file}{'blank'}  );
2809
              push @results,"  blank: "   . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100);
2810
              push @results,"  comment: " . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100);
2811
              push @results,"  code: "    . $rhh_count->{$lang_or_file}{'code'}   ;
2812
            } else {
2813
              push @results,"  blank: "   . $rhh_count->{$lang_or_file}{'blank'}  ;
2814
              push @results,"  comment: " . $rhh_count->{$lang_or_file}{'comment'};
2815
              push @results,"  code: "    . $rhh_count->{$lang_or_file}{'code'}   ;
2816
            }
2817
            push @results,"  language: ".$rhh_count->{$lang_or_file}{'lang'} 
2818
                if $BY_FILE;
2819
            if ($opt_3) {
2820
                push @results, "  scaled: " . $scaled;
2821
                push @results, "  factor: " . $factor;
2822
            }
2823
        } elsif ($opt_csv) {
2824
            my $extra_3 = "";
2825
               $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
2826
            my $first_column = undef;
2827
            my $clean_name   = $lang_or_file;
2828
            if ($BY_FILE) { 
2829
                $first_column = $rhh_count->{$lang_or_file}{'lang'};
2830
                $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2831
            } else {
2832
                $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
2833
            }
2834
            my $str = $first_column                         . ${DELIM} .
2835
                      $clean_name                           . ${DELIM};
2836
            if ($opt_by_percent) { 
2837
              my $DEN = compute_denominator($opt_by_percent               ,
2838
                            $rhh_count->{$lang_or_file}{'code'}{'code'}   ,
2839
                            $rhh_count->{$lang_or_file}{'code'}{'comment'},
2840
                            $rhh_count->{$lang_or_file}{'code'}{'blank'}  );
2841
              $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
2842
                      sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
2843
                      $rhh_count->{$lang_or_file}{'code'};
2844
            } else {
2845
              $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
2846
                      $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
2847
                      $rhh_count->{$lang_or_file}{'code'};
2848
            }
2849
            $str .= $extra_3;
2850
            push @results, $str;
2851
        } else {
2852
            push @results, $data_line;
2853
        }
2854
    }
2855
 
2856
    my $avg_scale = 1;  # weighted average of scale factors
2857
       $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code) 
2858
            if $sum_code and $opt_3;
2859
 
2860
    if ($opt_xml) {
2861
        $data_line = "";
2862
        if (!$BY_FILE) {
2863
            $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
2864
        }
2865
        if ($opt_by_percent) {
2866
          my $DEN = compute_denominator($opt_by_percent    ,
2867
                        $sum_code, $sum_comment, $sum_blank);
2868
          $data_line .= sprintf $Format{'5'}{$Style},
2869
              $sum_blank   / $DEN * 100,
2870
              $sum_comment / $DEN * 100,
2871
              $sum_code    ;
2872
        } else {
2873
          $data_line .= sprintf $Format{'4'}{$Style},
2874
              $sum_blank   ,
2875
              $sum_comment ,
2876
              $sum_code    ;
2877
        }
2878
        $data_line .= sprintf $Format{'6'}{$Style},
2879
            $avg_scale   ,
2880
            $sum_scaled  if $opt_3;
2881
        push @results, "  <total " . $data_line . "/>";
2882
 
2883
        if ($BY_FILE or ($report_type eq "by report file")) {
2884
            push @results, "</files>";
2885
        } else {
2886
            foreach my $language (keys %{$languages}) {
2887
                push @results, '  <language name="' . $language . '"/>';
2888
            }
2889
            push @results, "</languages>";
2890
        }
2891
 
2892
        if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
2893
            push @results, "</results>";
2894
        } else {
2895
            $ALREADY_SHOWED_XML_SECTION = 1;
2896
        }
2897
    } elsif ($opt_yaml) {
2898
        push @results, "SUM:";
2899
        if ($opt_by_percent) {
2900
          my $DEN = compute_denominator($opt_by_percent    ,
2901
                        $sum_code, $sum_comment, $sum_blank);
2902
          push @results, "  blank: "  . sprintf("%.2f", $sum_blank   / $DEN * 100);
2903
          push @results, "  comment: ". sprintf("%.2f", $sum_comment / $DEN * 100);
2904
          push @results, "  code: "   . $sum_code   ;
2905
        } else {
2906
          push @results, "  blank: "  . $sum_blank  ;
2907
          push @results, "  comment: ". $sum_comment;
2908
          push @results, "  code: "   . $sum_code   ;
2909
        }
2910
        push @results, "  nFiles: " . $sum_files  ;
2911
        if ($opt_3) {
2912
            push @results, "  scaled: " . $sum_scaled;
2913
            push @results, "  factor: " . $avg_scale ;
2914
        }
2915
    } elsif ($opt_csv) {
2916
        # do nothing
2917
    } else {
2918
 
2919
        if ($BY_FILE) {
2920
            $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
2921
        } else {
2922
            $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
2923
            $data_line .= sprintf "%${spacing_2}d ", $sum_files;
2924
        }
2925
        if ($opt_by_percent) {
2926
          my $DEN = compute_denominator($opt_by_percent    ,
2927
                        $sum_code, $sum_comment, $sum_blank);
2928
          $data_line .= sprintf $Format{'5'}{$Style},
2929
              $sum_blank   / $DEN * 100,
2930
              $sum_comment / $DEN * 100,
2931
              $sum_code    ;
2932
        } else {
2933
          $data_line .= sprintf $Format{'4'}{$Style},
2934
              $sum_blank   ,
2935
              $sum_comment ,
2936
              $sum_code    ;
2937
        }
2938
        $data_line .= sprintf $Format{'6'}{$Style},
2939
            $avg_scale   ,
2940
            $sum_scaled if $opt_3;
2941
        push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
2942
        push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
2943
        push @results, $hyphen_line;
2944
    }
2945
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2946
    print "<- generate_report\n" if $opt_v > 2;
2947
    return @results;
2948
} # 1}}}
2949
sub print_errors {                           # {{{1
2950
    my ($rh_Error_Codes, # in
2951
        $raa_errors    , # in
2952
       ) = @_;
2953
 
2954
    print "-> print_errors\n" if $opt_v > 2;
2955
    my %error_string = reverse(%{$rh_Error_Codes});
2956
    my $nErrors      = scalar @{$raa_errors};
2957
    warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
2958
    for (my $i = 0; $i < $nErrors; $i++) {
2959
        warn sprintf "%s:  %s\n", 
2960
                     $error_string{ $raa_errors->[$i][0] },
2961
                     $raa_errors->[$i][1] ;
2962
    }
2963
    print "<- print_errors\n" if $opt_v > 2;
2964
 
2965
} # 1}}}
2966
sub write_lang_def {                         # {{{1
2967
    my ($file                     ,
2968
        $rh_Language_by_Extension , # in
2969
        $rh_Language_by_Script    , # in
2970
        $rh_Language_by_File      , # in
2971
        $rhaa_Filters_by_Language , # in
2972
        $rh_Not_Code_Extension    , # in
2973
        $rh_Not_Code_Filename     , # in
2974
        $rh_Scale_Factor          , # in
2975
        $rh_EOL_Continuation_re   , # in
2976
       ) = @_;
2977
 
2978
    print "-> write_lang_def($file)\n" if $opt_v > 2;
2979
    my $OUT = new IO::File $file, "w";
2980
    die "Unable to write to $file\n" unless defined $OUT;
2981
 
2982
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
2983
        next if $language eq "MATLAB/Objective C/MUMPS/Mercury" or
2984
                $language eq "PHP/Pascal"               or
2985
                $language eq "Pascal/Puppet"            or
2986
                $language eq "Lisp/OpenCL"              or
2987
                $language eq "Lisp/Julia"               or
2988
                $language eq "Perl/Prolog"              or
2989
                $language eq "D/dtrace"                 or
2990
                $language eq "IDL/Qt Project/Prolog"    or
2991
                $language eq "(unknown)";
2992
        printf $OUT "%s\n", $language;
2993
        foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
2994
            printf $OUT "    filter %s", $filter->[0];
2995
            printf $OUT " %s", $filter->[1] if defined $filter->[1];
2996
            # $filter->[0] == 'remove_between_general',
2997
            #                 'remove_between_regex', and
2998
            #                 'remove_matches_2re' have two args
2999
            printf $OUT " %s", $filter->[2] if defined $filter->[2];
3000
            print  $OUT "\n";
3001
        }
3002
        foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
3003
            if ($language eq $rh_Language_by_Extension->{$ext}) {
3004
                printf $OUT "    extension %s\n", $ext;
3005
            }
3006
        }
3007
        foreach my $filename (sort keys %{$rh_Language_by_File}) {
3008
            if ($language eq $rh_Language_by_File->{$filename}) {
3009
                printf $OUT "    filename %s\n", $filename;
3010
            }
3011
        }
3012
        foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
3013
            if ($language eq $rh_Language_by_Script->{$script_exe}) {
3014
                printf $OUT "    script_exe %s\n", $script_exe;
3015
            }
3016
        }
3017
        printf $OUT "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
3018
        if (defined $rh_EOL_Continuation_re->{$language}) {
3019
            printf $OUT "    end_of_line_continuation %s\n", 
3020
                $rh_EOL_Continuation_re->{$language};
3021
        }
3022
    }
3023
 
3024
    $OUT->close;
3025
    print "<- write_lang_def\n" if $opt_v > 2;
3026
} # 1}}}
3027
sub read_lang_def {                          # {{{1
3028
    my ($file                     ,
3029
        $rh_Language_by_Extension , # out
3030
        $rh_Language_by_Script    , # out
3031
        $rh_Language_by_File      , # out
3032
        $rhaa_Filters_by_Language , # out
3033
        $rh_Not_Code_Extension    , # out
3034
        $rh_Not_Code_Filename     , # out
3035
        $rh_Scale_Factor          , # out
3036
        $rh_EOL_Continuation_re   , # out
3037
        $rh_EOL_abc,
3038
       ) = @_;
3039
 
3040
 
3041
    print "-> read_lang_def($file)\n" if $opt_v > 2;
3042
    my $IN = new IO::File $file, "r";
3043
    die "Unable to read $file.\n" unless defined $IN;
3044
 
3045
    my $language = "";
3046
    while (<$IN>) {
3047
        next if /^\s*#/ or /^\s*$/;
3048
 
3049
        if (/^(\w+.*?)\s*$/) {
3050
            $language = $1;
3051
            next;
3052
        }
3053
        die "Missing computer language name, line $. of $file\n"
3054
            unless $language;
3055
 
3056
        if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
3057
                       \s+(\S+)\s+(\S+)s*$/x) {
3058
            push @{$rhaa_Filters_by_Language->{$language}}, [ 
3059
                  $1 , $3 , $4 ]
3060
 
3061
        } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
3062
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
3063
 
3064
        } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
3065
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
3066
 
3067
        } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
3068
            if (defined $rh_Language_by_Extension->{$1}) {
3069
                die "File extension collision:  $1 ",
3070
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
3071
                    "and '$language'\n" ,
3072
                    "Edit $file and remove $1 from one of these two ",
3073
                    "language definitions.\n";
3074
            }
3075
            $rh_Language_by_Extension->{$1} = $language;
3076
 
3077
        } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
3078
            $rh_Language_by_File->{$1} = $language;
3079
 
3080
        } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
3081
            $rh_Language_by_Script->{$1} = $language;
3082
 
3083
        } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
3084
            $rh_Scale_Factor->{$language} = $1;
3085
 
3086
        } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
3087
            $rh_EOL_Continuation_re->{$language} = $1;
3088
 
3089
        } else {
3090
            die "Unexpected data line $. of $file:\n$_\n";
3091
        }
3092
 
3093
    }
3094
    $IN->close;
3095
    print "<- read_lang_def\n" if $opt_v > 2;
3096
} # 1}}}
3097
sub merge_lang_def {                         # {{{1
3098
    my ($file                     ,
3099
        $rh_Language_by_Extension , # in/out
3100
        $rh_Language_by_Script    , # in/out
3101
        $rh_Language_by_File      , # in/out
3102
        $rhaa_Filters_by_Language , # in/out
3103
        $rh_Not_Code_Extension    , # in/out
3104
        $rh_Not_Code_Filename     , # in/out
3105
        $rh_Scale_Factor          , # in/out
3106
        $rh_EOL_Continuation_re   , # in/out
3107
        $rh_EOL_abc,
3108
       ) = @_;
3109
 
3110
 
3111
    print "-> merge_lang_def($file)\n" if $opt_v > 2;
3112
    my $IN = new IO::File $file, "r";
3113
    die "Unable to read $file.\n" unless defined $IN;
3114
 
3115
    my $language        = "";  
3116
    my $already_know_it = undef;
3117
    while (<$IN>) {
3118
        next if /^\s*#/ or /^\s*$/;
3119
 
3120
        if (/^(\w+.*?)\s*$/) {
3121
            $language = $1;
3122
            $already_know_it = defined $rh_Scale_Factor->{$language};
3123
            next;
3124
        }
3125
        die "Missing computer language name, line $. of $file\n"
3126
            unless $language;
3127
 
3128
        if      (/^    filter\s+(\w+)\s*$/) {
3129
            next if $already_know_it;
3130
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
3131
 
3132
        } elsif (/^    filter\s+(\w+)\s+(.*?)\s*$/) {
3133
            next if $already_know_it;
3134
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
3135
 
3136
        } elsif (/^    extension\s+(\S+)\s*$/) {
3137
            next if $already_know_it;
3138
            if (defined $rh_Language_by_Extension->{$1}) {
3139
                die "File extension collision:  $1 ",
3140
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
3141
                    "and '$language'\n" ,
3142
                    "Edit $file and remove $1 from one of these two ",
3143
                    "language definitions.\n";
3144
            }
3145
            $rh_Language_by_Extension->{$1} = $language;
3146
 
3147
        } elsif (/^    filename\s+(\S+)\s*$/) {
3148
            next if $already_know_it;
3149
            $rh_Language_by_File->{$1} = $language;
3150
 
3151
        } elsif (/^    script_exe\s+(\S+)\s*$/) {
3152
            next if $already_know_it;
3153
            $rh_Language_by_Script->{$1} = $language;
3154
 
3155
        } elsif (/^    3rd_gen_scale\s+(\S+)\s*$/) {
3156
            next if $already_know_it;
3157
            $rh_Scale_Factor->{$language} = $1;
3158
 
3159
        } elsif (/^    end_of_line_continuation\s+(\S+)\s*$/) {
3160
            next if $already_know_it;
3161
            $rh_EOL_Continuation_re->{$language} = $1;
3162
 
3163
        } else {
3164
            die "Unexpected data line $. of $file:\n$_\n";
3165
        }
3166
 
3167
    }
3168
    $IN->close;
3169
    print "<- merge_lang_def\n" if $opt_v > 2;
3170
} # 1}}}
3171
sub print_extension_info {                   # {{{1
3172
    my ($extension,) = @_;
3173
    if ($extension) {  # show information on this extension
3174
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3175
            # Language_by_Extension{f}    = 'Fortran 77' 
3176
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
3177
                if $ext =~ m{$extension}i;
3178
        }
3179
    } else {           # show information on all  extensions
3180
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3181
            # Language_by_Extension{f}    = 'Fortran 77' 
3182
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
3183
        }
3184
    }
3185
} # 1}}}
3186
sub print_language_info {                    # {{{1
3187
    my ($language,         
3188
        $prefix ,) = @_;
3189
    my %extensions = (); # the subset matched by the given $language value
3190
    if ($language) {  # show information on this language
3191
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3192
            # Language_by_Extension{f}    = 'Fortran 77' 
3193
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
3194
                if lc $Language_by_Extension{$ext} eq lc $language;
3195
#               if $Language_by_Extension{$ext} =~ m{$language}i;
3196
        }
3197
    } else {          # show information on all  languages
3198
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3199
            # Language_by_Extension{f}    = 'Fortran 77' 
3200
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
3201
        }
3202
    }
3203
 
3204
    # add exceptions (one file extension mapping to multiple languages)
3205
    if (!$language or 
3206
        $language =~ /^(Objective C|MATLAB|MUMPS|Mercury)$/i) {
3207
        push @{$extensions{'Objective C'}}, "m";
3208
        push @{$extensions{'MATLAB'}}     , "m";
3209
        push @{$extensions{'MUMPS'}}      , "m";
3210
        delete $extensions{'MATLAB/Objective C/MUMPS/Mercury'};
3211
    } elsif ($language =~ /^(Lisp|OpenCL)$/i) {
3212
        push @{$extensions{'Lisp'}}  , "cl";
3213
        push @{$extensions{'OpenCL'}}, "cl";
3214
        delete $extensions{'Lisp/OpenCL'};
3215
    } elsif ($language =~ /^(Lisp|Julia)$/i) {
3216
        push @{$extensions{'Lisp'}}  , "jl";
3217
        push @{$extensions{'Julia'}} , "jl";
3218
        delete $extensions{'Lisp/Julia'};
3219
    } elsif ($language =~ /^(Perl|Prolog)$/i) {
3220
        push @{$extensions{'Perl'}}  , "pl";
3221
        push @{$extensions{'Prolog'}}, "pl";
3222
        delete $extensions{'Perl/Prolog'};
3223
    } elsif ($language =~ /^(IDL|Qt Project|Prolog)$/i) {
3224
        push @{$extensions{'IDL'}}       , "pro";
3225
        push @{$extensions{'Qt Project'}}, "pro";
3226
        push @{$extensions{'Prolog'}}    , "pro";
3227
        delete $extensions{'IDL/Qt Project/Prolog'};
3228
    } elsif ($language =~ /^(D|dtrace)$/i) {
3229
        push @{$extensions{'D'}}       , "d";
3230
        push @{$extensions{'dtrace'}}  , "d";
3231
        delete $extensions{'D/dtrace'};
3232
    } elsif ($language =~ /^(Ant)$/i) {
3233
        push @{$extensions{'Ant'}}  , "build.xml";
3234
        delete $extensions{'Ant/XML'};
3235
    }
3236
 
3237
    if (%extensions) {
3238
        foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
3239
            if ($prefix) {
3240
                printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
3241
            } else {
3242
                printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
3243
            }
3244
        }
3245
    }
3246
} # 1}}}
3247
sub print_language_filters {                 # {{{1
3248
    my ($language,) = @_;
3249
    if (!@{$Filters_by_Language{$language}}) {
3250
        warn "Unknown language: $language\n";
3251
        warn "Use --show-lang to list all defined languages.\n";
3252
        return;
3253
    }
3254
    printf "%s\n", $language;
3255
    foreach my $filter (@{$Filters_by_Language{$language}}) {
3256
        printf "    filter %s", $filter->[0];
3257
        printf "  %s", $filter->[1] if defined $filter->[1];
3258
        printf "  %s", $filter->[2] if defined $filter->[2];
3259
        print  "\n";
3260
    }
3261
    print_language_info($language, "    extensions:");
3262
} # 1}}}
3263
sub make_file_list {                         # {{{1
3264
    my ($ra_arg_list,  # in   file and/or directory names to examine
3265
        $rh_Err     ,  # in   hash of error codes
3266
        $raa_errors ,  # out  errors encountered
3267
        $rh_ignored ,  # out  files not recognized as computer languages
3268
        ) = @_;
3269
    print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
3270
 
3271
    my ($fh, $filename);
3272
    if ($opt_categorized) {
3273
        $filename = $opt_categorized;
3274
        $fh = new IO::File $filename, "+>";  # open for read/write
3275
        die "Unable to write to $filename:  $!\n" unless defined $fh;
3276
    } elsif ($opt_sdir) {
3277
        # write to the user-defined scratch directory
3278
        $filename = $opt_sdir . '/cloc_file_list.txt';
3279
        $fh = new IO::File $filename, "+>";  # open for read/write
3280
        die "Unable to write to $filename:  $!\n" unless defined $fh;
3281
    } else {
3282
        # let File::Temp create a suitable temporary file
3283
        ($fh, $filename) = tempfile(UNLINK => 1);  # delete file on exit
3284
        print "Using temp file list [$filename]\n" if $opt_v;
3285
    }
3286
 
3287
    my @dir_list = ();
3288
    foreach my $file_or_dir (@{$ra_arg_list}) {
3289
#print "make_file_list file_or_dir=$file_or_dir\n";
3290
        my $size_in_bytes = 0;
3291
        if (!-r $file_or_dir) {
3292
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
3293
            next;
3294
        }
3295
        if (is_file($file_or_dir)) {
3296
            if (!(-s $file_or_dir)) {   # 0 sized file, named pipe, socket 
3297
                $rh_ignored->{$file_or_dir} = 'zero sized file';
3298
                next;
3299
            } elsif (-B $file_or_dir and !$opt_read_binary_files) { 
3300
                # avoid binary files unless user insists on reading them
3301
                if ($opt_unicode) {
3302
                    # only ignore if not a Unicode file w/trivial 
3303
                    # ASCII transliteration
3304
                    if (!unicode_file($file_or_dir)) {
3305
                        $rh_ignored->{$file_or_dir} = 'binary file';
3306
                        next;
3307
                    }
3308
                } else {
3309
                    $rh_ignored->{$file_or_dir} = 'binary file';
3310
                    next;
3311
                }
3312
            }
3313
            push @file_list, "$file_or_dir";
3314
        } elsif (is_dir($file_or_dir)) {
3315
            push @dir_list, $file_or_dir;
3316
        } else {
3317
            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
3318
            $rh_ignored->{$file_or_dir} = 'not file, not directory';
3319
        }
3320
    }
3321
    foreach my $dir (@dir_list) {
3322
#print "make_file_list dir=$dir\n";
3323
        # populates global variable @file_list
3324
        find({wanted     => \&files            ,
3325
              preprocess => \&find_preprocessor,
3326
              follow     =>  $opt_follow_links }, $dir);  
3327
    }
3328
    $nFiles_Found = scalar @file_list;
3329
    printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
3330
    write_file($opt_found, sort @file_list) if $opt_found;
3331
 
3332
    my $nFiles_Categorized = 0;
3333
    foreach my $file (@file_list) {
3334
        printf "classifying $file\n" if $opt_v > 2;
3335
 
3336
        my $basename = basename $file;
3337
        if ($Not_Code_Filename{$basename}) {
3338
            $rh_ignored->{$file} = "listed in " . '$' .
3339
                "Not_Code_Filename{$basename}";
3340
            next;
3341
        } elsif ($basename =~ m{~$}) {
3342
            $rh_ignored->{$file} = "temporary editor file";
3343
            next;
3344
        }
3345
 
3346
        my $size_in_bytes = (stat $file)[7];
3347
        my $language      = "";
3348
        if ($All_One_Language) {
3349
            # user over-rode auto-language detection by using
3350
            # --force-lang with just a language name (no extension)
3351
            $language      = $All_One_Language;
3352
        } else {
3353
            $language      = classify_file($file      ,
3354
                                           $rh_Err    ,
3355
                                           $raa_errors,
3356
                                           $rh_ignored);
3357
        }
3358
die  "make_file_list($file) undef size" unless defined $size_in_bytes;
3359
die  "make_file_list($file) undef lang" unless defined $language;
3360
        printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
3361
        ++$nFiles_Categorized;
3362
        #printf "classified %d files\n", $nFiles_Categorized 
3363
        #    unless (!$opt_progress_rate or 
3364
        #            ($nFiles_Categorized % $opt_progress_rate));
3365
    }
3366
    printf "classified %d files\r", $nFiles_Categorized 
3367
        if !$opt_quiet and $nFiles_Categorized > 1;
3368
 
3369
    print "<- make_file_list()\n" if $opt_v > 2;
3370
 
3371
    return $fh;   # handle to the file containing the list of files to process
3372
}  # 1}}}
3373
sub remove_duplicate_files {                 # {{{1
3374
    my ($fh                   , # in 
3375
        $rh_Language          , # out
3376
        $rh_unique_source_file, # out
3377
        $rh_Err               , # in
3378
        $raa_errors           , # out  errors encountered
3379
        $rh_ignored           , # out
3380
        ) = @_;
3381
 
3382
    # Check for duplicate files by comparing file sizes.
3383
    # Where files are equally sized, compare their MD5 checksums.
3384
    print "-> remove_duplicate_files\n" if $opt_v > 2;
3385
 
3386
    my $n = 0;
3387
    my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
3388
    seek($fh, 0, 0); # rewind to beginning of the temp file
3389
    while (<$fh>) {
3390
        ++$n;
3391
        my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
3392
        chomp($file);
3393
        $rh_Language->{$file} = $language;
3394
        push @{$files_by_size{$size_in_bytes}}, $file;
3395
        if ($opt_skip_uniqueness) {
3396
            $rh_unique_source_file->{$file} = 1;
3397
        }
3398
    }
3399
    return if $opt_skip_uniqueness;
3400
    if ($opt_progress_rate and ($n > $opt_progress_rate)) {
3401
        printf "Duplicate file check %d files (%d known unique)\r", 
3402
            $n, scalar keys %files_by_size;
3403
    }
3404
    $n = 0;
3405
    foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
3406
        ++$n;
3407
        printf "Unique: %8d files                                          \r",
3408
            $n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
3409
        if (scalar @{$files_by_size{$bytes}} == 1) {
3410
            # only one file is this big; must be unique
3411
            $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
3412
            next;
3413
        } else {
3414
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
3415
            # Files in the list @{$files_by_size{$bytes} all are
3416
            # $bytes long.  Sort the list by file basename.
3417
 
3418
          # # sorting on basename causes repeatability problems
3419
          # # if the basename is not unique (eg "includeA/x.h"
3420
          # # and "includeB/x.h".  Instead, sort on full path.
3421
          # # Ref bug #114.
3422
          # my @sorted_bn = ();
3423
          # my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
3424
          # foreach my $F (sort keys %BN) {
3425
          #     push @sorted_bn, $BN{$F};
3426
          # }
3427
 
3428
            my @sorted_bn = sort @{$files_by_size{$bytes}};
3429
 
3430
            foreach my $F (different_files(\@sorted_bn  ,
3431
                                            $rh_Err     ,
3432
                                            $raa_errors ,
3433
                                            $rh_ignored ) ) {
3434
                $rh_unique_source_file->{$F} = 1;
3435
            }
3436
        }
3437
    }
3438
    print "<- remove_duplicate_files\n" if $opt_v > 2;
3439
} # 1}}}
3440
sub find_preprocessor {                      # {{{1
3441
    # invoked by File::Find's find()   
3442
    # Reads global variable %Exclude_Dir.
3443
    # Populates global variable %Ignored.
3444
    # Reject files/directories in cwd which are in the exclude list.
3445
 
3446
    my @ok = ();
3447
    foreach my $F_or_D (@_) {  # pure file or directory name, no separators
3448
        if ($Exclude_Dir{$F_or_D}) {
3449
            $Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
3450
        } elsif (-d $F_or_D) {
3451
            if ($opt_not_match_d and $F_or_D =~ m{$opt_not_match_d}) {
3452
                $Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
3453
            } else {
3454
                push @ok, $F_or_D;
3455
            }
3456
 
3457
        } else {
3458
            push @ok, $F_or_D;
3459
        }
3460
    }   
3461
    return @ok;
3462
} # 1}}}
3463
sub files {                                  # {{{1
3464
    # invoked by File::Find's find()   Populates global variable @file_list.
3465
    # See also find_preprocessor() which prunes undesired directories.
3466
 
3467
    my $Dir = cwd(); # not $File::Find::dir which just gives relative path
3468
    if ($opt_match_f    ) { return unless /$opt_match_f/;     }
3469
    if ($opt_not_match_f) { return if     /$opt_not_match_f/; }
3470
    if ($opt_match_d    ) { return unless $Dir =~ m{$opt_match_d}     }
3471
 
3472
    my $nBytes = -s $_ ;
3473
    if (!$nBytes) {
3474
        $Ignored{$File::Find::name} = 'zero sized file';
3475
        printf "files(%s)  zero size\n", $File::Find::name if $opt_v > 5;
3476
    }
3477
    return unless $nBytes  ; # attempting other tests w/pipe or socket will hang
3478
    if ($nBytes > $opt_max_file_size*1024**2) {
3479
        $Ignored{$File::Find::name} = "file size of " .
3480
            $nBytes/1024**2 . " MB exceeds max file size of " .
3481
            "$opt_max_file_size MB";
3482
        printf "file(%s)  exceeds $opt_max_file_size MB\n", 
3483
            $File::Find::name if $opt_v > 5;
3484
        return;
3485
    }
3486
    my $is_dir = is_dir($_);
3487
    my $is_bin = -B     $_ ;
3488
    printf "files(%s)  size=%d is_dir=%d  -B=%d\n",
3489
        $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
3490
    $is_bin = 0 if $opt_unicode and unicode_file($_);
3491
    $is_bin = 0 if $opt_read_binary_files;
3492
    return if $is_dir or $is_bin;
3493
    ++$nFiles_Found;
3494
    printf "%8d files\r", $nFiles_Found 
3495
        unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
3496
    push @file_list, $File::Find::name;
3497
} # 1}}}
3498
sub archive_files {                          # {{{1
3499
    # invoked by File::Find's find()  Populates global variable @binary_archive
3500
    foreach my $ext (keys %Known_Binary_Archives) {
3501
        push @binary_archive, $File::Find::name 
3502
            if $File::Find::name =~ m{$ext$};
3503
    }
3504
} # 1}}}
3505
sub is_file {                                # {{{1
3506
    # portable method to test if item is a file
3507
    # (-f doesn't work in ActiveState Perl on Windows)
3508
    my $item = shift @_;
3509
 
3510
    if ($ON_WINDOWS) {
3511
        my $mode = (stat $item)[2];
3512
           $mode = 0 unless $mode;
3513
        if ($mode & 0100000) { return 1; } 
3514
        else                 { return 0; }
3515
    } else {
3516
        return (-f $item);  # works on Unix, Linux, CygWin, z/OS
3517
    }
3518
} # 1}}}
3519
sub is_dir {                                 # {{{1
3520
    # portable method to test if item is a directory
3521
    # (-d doesn't work in ActiveState Perl on Windows)
3522
    my $item = shift @_;
3523
 
3524
    if ($ON_WINDOWS) {
3525
        my $mode = (stat $item)[2];
3526
           $mode = 0 unless $mode;
3527
        if ($mode & 0040000) { return 1; } 
3528
        else                 { return 0; }
3529
    } else {
3530
        return (-d $item);  # works on Unix, Linux, CygWin, z/OS
3531
    }
3532
} # 1}}}
3533
sub is_excluded {                            # {{{1
3534
    my ($file       , # in
3535
        $excluded   , # in   hash of excluded directories
3536
       ) = @_;
3537
    my($filename, $filepath, $suffix) = fileparse($file);
3538
    foreach my $path (sort keys %{$excluded}) {
3539
        return 1 if ($filepath =~ m{^$path/}i);
3540
    }
3541
} # 1}}}
3542
sub classify_file {                          # {{{1
3543
    my ($full_file   , # in
3544
        $rh_Err      , # in   hash of error codes
3545
        $raa_errors  , # out
3546
        $rh_ignored  , # out
3547
       ) = @_;
3548
 
3549
    print "-> classify_file($full_file)\n" if $opt_v > 2;
3550
    my $language = "(unknown)";
3551
 
3552
    if (basename($full_file) eq "-" && defined $opt_stdin_name) {
3553
       $full_file = $opt_stdin_name;
3554
    }
3555
 
3556
    my $look_at_first_line = 0;
3557
    my $file = basename $full_file; 
3558
    if ($opt_autoconf and $file =~ /\.in$/) {
3559
       $file =~ s/\.in$//;
3560
    }
3561
    return $language if $Not_Code_Filename{$file}; # (unknown)
3562
    return $language if $file =~ m{~$}; # a temp edit file (unknown)
3563
    if (defined $Language_by_File{$file}) {
3564
        if      ($Language_by_File{$file} eq "Ant/XML") {
3565
            return Ant_or_XML(  $full_file, $rh_Err, $raa_errors);
3566
        } elsif ($Language_by_File{$file} eq "Maven/XML") {
3567
            return Maven_or_XML($full_file, $rh_Err, $raa_errors);
3568
        } else {
3569
            return $Language_by_File{$file};
3570
        }
3571
    }
3572
 
3573
    if ($file =~ /\.([^\.]+)$/) { # has an extension
3574
      print "$full_file extension=[$1]\n" if $opt_v > 2;
3575
      my $extension = $1;
3576
         # Windows file names are case insensitive so map 
3577
         # all extensions to lowercase there.
3578
         $extension = lc $extension if $ON_WINDOWS;  
3579
      my @extension_list = ( $extension );
3580
      if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
3581
          my $extension = $1;
3582
          $extension = lc $extension if $ON_WINDOWS;  
3583
          unshift @extension_list, $extension;  # examine double ext first
3584
      }
3585
      foreach my $extension (@extension_list) {
3586
        if ($Not_Code_Extension{$extension} and 
3587
           !$Forced_Extension{$extension}) {
3588
           # If .1 (for example) is an extension that would ordinarily be
3589
           # ignored but the user has insisted this be counted with the
3590
           # --force-lang option, then go ahead and count it.
3591
            $rh_ignored->{$full_file} = 
3592
                'listed in $Not_Code_Extension{' . $extension . '}';
3593
            return $language;
3594
        }
3595
        if (defined $Language_by_Extension{$extension}) {
3596
            if ($Language_by_Extension{$extension} eq
3597
                'MATLAB/Objective C/MUMPS/Mercury') {
3598
                my $lang_M_or_O = "";
3599
                matlab_or_objective_C($full_file , 
3600
                                      $rh_Err    ,
3601
                                      $raa_errors,
3602
                                     \$lang_M_or_O);
3603
                if ($lang_M_or_O) {
3604
                    return $lang_M_or_O;
3605
                } else { # an error happened in matlab_or_objective_C()
3606
                    $rh_ignored->{$full_file} = 
3607
                        'failure in matlab_or_objective_C()';
3608
                    return $language; # (unknown)
3609
                }
3610
            } elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal') {
3611
                if (really_is_php($full_file)) {
3612
                    return 'PHP';
3613
                } elsif (really_is_incpascal($full_file)) {
3614
                    return 'Pascal';
3615
                } else {
3616
                    return $language; # (unknown)
3617
                }
3618
            } elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
3619
                my $lang_Pasc_or_Pup = "";
3620
                pascal_or_puppet(     $full_file , 
3621
                                      $rh_Err    ,
3622
                                      $raa_errors,
3623
                                     \$lang_Pasc_or_Pup);
3624
                if ($lang_Pasc_or_Pup) {
3625
                    return $lang_Pasc_or_Pup;
3626
                } else { # an error happened in pascal_or_puppet()
3627
                    $rh_ignored->{$full_file} = 
3628
                        'failure in pascal_or_puppet()';
3629
                    return $language; # (unknown)
3630
                }
3631
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
3632
                return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
3633
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
3634
                return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
3635
            } elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
3636
                return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
3637
            } elsif ($Language_by_Extension{$extension} eq 
3638
                     'IDL/Qt Project/Prolog') {
3639
                return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
3640
            } elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
3641
                # is it D or an init.d shell script?
3642
                my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
3643
                if ($a_script) {
3644
                    # could be dtrace, sh, bash or anything one would
3645
                    # write an init.d script in
3646
                    if (defined $Language_by_Script{$a_script}) {
3647
                        return $Language_by_Script{$a_script};
3648
                    } else {
3649
                        $rh_ignored->{$full_file} = 
3650
                            "Unrecognized script language, '$a_script'";
3651
                    }
3652
                } else {
3653
                    return 'D';
3654
                }
3655
            } elsif ($Language_by_Extension{$extension} eq 'Smarty') {
3656
                # Smarty extension .tpl is generic; make sure the
3657
                # file at least roughly resembles PHP.  Alternatively,
3658
                # if the user forces the issue, do the count.
3659
                my $force_smarty = 0;
3660
                foreach (@opt_force_lang) {
3661
                    if (lc($_) eq "smarty,tpl") {
3662
                        $force_smarty = 1; 
3663
                        last;
3664
                    }
3665
                }
3666
                if (really_is_php($full_file) or $force_smarty) {
3667
                    return 'Smarty';
3668
                } else {
3669
                    return $language; # (unknown)
3670
                }
3671
            } else {
3672
                return $Language_by_Extension{$extension};
3673
            }
3674
        } else { # has an unmapped file extension
3675
            $look_at_first_line = 1;
3676
        }
3677
      }
3678
    } elsif (defined $Language_by_File{lc $file}) {
3679
        return $Language_by_File{lc $file};
3680
    } elsif ($opt_lang_no_ext and 
3681
             defined $Filters_by_Language{$opt_lang_no_ext}) {
3682
        return $opt_lang_no_ext;
3683
    } else {  # no file extension
3684
        $look_at_first_line = 1;
3685
    }
3686
 
3687
    if ($look_at_first_line) {
3688
        # maybe it is a shell/Perl/Python/Ruby/etc script that
3689
        # starts with pound bang:
3690
        #   #!/usr/bin/perl
3691
        #   #!/usr/bin/env perl
3692
        my $script_language = peek_at_first_line($full_file , 
3693
                                                 $rh_Err    , 
3694
                                                 $raa_errors);
3695
        if (!$script_language) {
3696
            $rh_ignored->{$full_file} = "language unknown (#2)";
3697
            # returns (unknown)
3698
        }
3699
        if (defined $Language_by_Script{$script_language}) {
3700
            if (defined $Filters_by_Language{
3701
                            $Language_by_Script{$script_language}}) {
3702
                $language = $Language_by_Script{$script_language};
3703
            } else {
3704
                $rh_ignored->{$full_file} = 
3705
                    "undefined:  Filters_by_Language{" . 
3706
                    $Language_by_Script{$script_language} .
3707
                    "} for scripting language $script_language";
3708
                # returns (unknown)
3709
            }
3710
        } else {
3711
            $rh_ignored->{$full_file} = "language unknown (#3)";
3712
            # returns (unknown)
3713
        }
3714
    }
3715
    print "<- classify_file($full_file)\n" if $opt_v > 2;
3716
    return $language;
3717
} # 1}}}
3718
sub peek_at_first_line {                     # {{{1
3719
    my ($file        , # in
3720
        $rh_Err      , # in   hash of error codes
3721
        $raa_errors  , # out
3722
       ) = @_;
3723
 
3724
    print "-> peek_at_first_line($file)\n" if $opt_v > 2;
3725
 
3726
    my $script_language = "";
3727
    if (!-r $file) {
3728
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
3729
        return $script_language;
3730
    }
3731
    my $IN = new IO::File $file, "r";
3732
    if (!defined $IN) {
3733
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
3734
        print "<- peek_at_first_line($file)\n" if $opt_v > 2;
3735
        return $script_language;
3736
    }
3737
    chomp(my $first_line = <$IN>);
3738
    if (defined $first_line) {
3739
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
3740
        if ($first_line =~ /^#\!\s*(\S.*?)$/) {
3741
#print "peek_at_first_line 1=[$1]\n";
3742
            my @pound_bang = split(' ', $1);
3743
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
3744
            if (basename($pound_bang[0]) eq "env" and 
3745
                scalar @pound_bang > 1) {
3746
                $script_language = $pound_bang[1];
3747
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
3748
            } else {
3749
                $script_language = basename $pound_bang[0];
3750
#print "peek_at_first_line pound_bang B $script_language\n";
3751
            }
3752
        }
3753
    }
3754
    $IN->close;
3755
    print "<- peek_at_first_line($file)\n" if $opt_v > 2;
3756
    return $script_language;
3757
} # 1}}}
3758
sub different_files {                        # {{{1
3759
    # See which of the given files are unique by computing each file's MD5
3760
    # sum.  Return the subset of files which are unique.
3761
    my ($ra_files    , # in
3762
        $rh_Err      , # in
3763
        $raa_errors  , # out
3764
        $rh_ignored  , # out
3765
       ) = @_;
3766
 
3767
    print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
3768
    my %file_hash = ();  # file_hash{md5 hash} = [ file1, file2, ... ]
3769
    foreach my $F (@{$ra_files}) {
3770
        next if is_dir($F);  # needed for Windows
3771
        my $IN = new IO::File $F, "r";
3772
        if (!defined $IN) {
3773
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
3774
            $rh_ignored->{$F} = 'cannot read';
3775
        } else {
3776
            if ($HAVE_Digest_MD5) {
3777
                binmode $IN;
3778
                my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
3779
#print "$F, $MD5\n";
3780
                push @{$file_hash{$MD5}}, $F;
3781
            } else {
3782
                # all files treated unique
3783
                push @{$file_hash{$F}}, $F;
3784
            }
3785
            $IN->close;
3786
        }
3787
    }
3788
 
3789
    # Loop over file sets having identical MD5 sums.  Within
3790
    # each set, pick the file that most resembles known source 
3791
    # code.
3792
    my @unique = ();
3793
    for my $md5 (sort keys %file_hash) {
3794
        my $i_best = 0;
3795
        for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
3796
            my $F = $file_hash{$md5}[$i];
3797
            my (@nul_a, %nul_h);
3798
            my $language = classify_file($F, $rh_Err, 
3799
                                        # don't save these errors; pointless
3800
                                        \@nul_a, \%nul_h);
3801
            $i_best = $i if $language ne "(unknown)";
3802
        }
3803
        # keep the best one found and identify the rest as ignored
3804
        for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
3805
            if ($i == $i_best) {
3806
                push @unique, $file_hash{$md5}[$i_best];
3807
            } else {
3808
                $rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
3809
                    $file_hash{$md5}[$i_best];
3810
            }
3811
        }
3812
 
3813
    }
3814
    print "<- different_files(@unique)\n" if $opt_v > 2;
3815
    return @unique;
3816
} # 1}}}
3817
sub call_counter {                           # {{{1
3818
    my ($file     , # in
3819
        $language , # in
3820
        $ra_Errors, # out
3821
       ) = @_;
3822
 
3823
    # Logic:  pass the file through the following filters:
3824
    #         1. remove blank lines
3825
    #         2. remove comments using each filter defined for this language
3826
    #            (example:  SQL has two, remove_starts_with(--) and 
3827
    #             remove_c_comments() )
3828
    #         3. compute comment lines as 
3829
    #               total lines - blank lines - lines left over after all
3830
    #                   comment filters have been applied
3831
 
3832
    print "-> call_counter($file, $language)\n" if $opt_v > 2;
3833
#print "call_counter:  ", Dumper(@routines), "\n";
3834
 
3835
    my @lines = ();
3836
    my $ascii = "";
3837
    if (-B $file and $opt_unicode) {
3838
        # was binary so must be unicode
3839
 
3840
        $/ = undef;
3841
        my $IN = new IO::File $file, "r";
3842
        my $bin_text = <$IN>;
3843
        $IN->close;
3844
        $/ = "\n";
3845
 
3846
        $ascii = unicode_to_ascii( $bin_text );
3847
        @lines = split("\n", $ascii );
3848
        foreach (@lines) { $_ = "$_\n"; }
3849
 
3850
    } else {
3851
        # regular text file
3852
        @lines = read_file($file);
3853
        $ascii = join('', @lines);
3854
    }
3855
 
3856
    my @original_lines = @lines;
3857
    my $total_lines    = scalar @lines;
3858
 
3859
    print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
3860
    @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
3861
    my $blank_lines = $total_lines - scalar @lines;
3862
    print_lines($file, "Blank lines removed:", \@lines) 
3863
        if $opt_print_filter_stages;
3864
 
3865
    @lines = rm_comments(\@lines, $language, $file,
3866
                               \%EOL_Continuation_re);
3867
 
3868
    my $comment_lines = $total_lines - $blank_lines - scalar  @lines;
3869
    if ($opt_strip_comments) {
3870
        my $stripped_file = "";
3871
        if ($opt_original_dir) {
3872
            $stripped_file =          $file . ".$opt_strip_comments";
3873
        } else {
3874
            $stripped_file = basename $file . ".$opt_strip_comments";
3875
        }
3876
        write_file($stripped_file, @lines);
3877
    }
3878
    if ($opt_html and !$opt_diff) {
3879
        chomp(@original_lines);  # includes blank lines, comments
3880
        chomp(@lines);           # no blank lines, no comments
3881
 
3882
        my (@diff_L, @diff_R, %count);
3883
 
3884
        # remove blank lines to get better quality diffs; count
3885
        # blank lines separately
3886
        my @original_lines_minus_white = ();
3887
        # however must keep track of how many blank lines were removed and
3888
        # where they were removed so that the HTML display can include it
3889
        my %blank_line  = ();
3890
        my $insert_line = 0;
3891
        foreach (@original_lines) {
3892
            if (/^\s*$/) {
3893
               ++$count{blank}{same};
3894
               ++$blank_line{ $insert_line };
3895
            } else {
3896
                ++$insert_line;
3897
                push @original_lines_minus_white, $_;
3898
            }
3899
        }
3900
 
3901
        array_diff( $file                       ,   # in
3902
                   \@original_lines_minus_white ,   # in
3903
                   \@lines                      ,   # in
3904
                   "comment"                    ,   # in
3905
                   \@diff_L, \@diff_R,          ,   # out
3906
                    $ra_Errors);                    # in/out
3907
        write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
3908
#print Dumper("count", \%count);
3909
    }
3910
 
3911
    print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n" 
3912
        if $opt_v > 2;
3913
    return ($total_lines, $blank_lines, $comment_lines);
3914
} # 1}}}
3915
sub windows_glob {                           # {{{1
3916
    # Windows doesn't expand wildcards.  Use code from Sean M. Burke's 
3917
    # Win32::Autoglob module to do this.
3918
    return map {;
3919
        ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
3920
          } @_; 
3921
} # 1}}}
3922
sub write_file {                             # {{{1
3923
    my ($file  , # in
3924
        @lines , # in
3925
       ) = @_;
3926
 
3927
#print "write_file 1 [$file]\n";
3928
    # Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
3929
    my $preglob_filename = $file;
3930
#print "write_file 2 [$preglob_filename]\n";
3931
    if ($ON_WINDOWS) {
3932
        $file = (windows_glob($file))[0];
3933
    } else {
3934
        $file = File::Glob::glob($file);
3935
    }
3936
#print "write_file 3 [$file]\n";
3937
    $file = $preglob_filename unless $file;
3938
#print "write_file 4 [$file]\n";
3939
 
3940
    print "-> write_file($file)\n" if $opt_v > 2;
3941
 
3942
    # Create the destination directory if it doesn't already exist.
3943
    my $abs_file_path = File::Spec->rel2abs( $file );
3944
    my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
3945
    mkpath($volume . $directories, 1, 0777);
3946
 
3947
    my $OUT = new IO::File $file, "w";
3948
    if (defined $OUT) {
3949
        chomp(@lines);
3950
        print $OUT join("\n", @lines), "\n";
3951
        $OUT->close;
3952
    } else {
3953
        warn "Unable to write to $file\n";
3954
    }
3955
    print "Wrote $file" unless $opt_quiet;
3956
    print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
3957
    print "\n" unless $opt_quiet;
3958
 
3959
    print "<- write_file\n" if $opt_v > 2;
3960
} # 1}}}
3961
sub read_file  {                             # {{{1
3962
    my ($file, ) = @_;
3963
    my %BoM = (
3964
        "fe ff"           => 2 ,
3965
        "ff fe"           => 2 ,
3966
        "ef bb bf"        => 3 ,
3967
        "f7 64 4c"        => 3 ,
3968
        "0e fe ff"        => 3 ,
3969
        "fb ee 28"        => 3 ,
3970
        "00 00 fe ff"     => 4 ,
3971
        "ff fe 00 00"     => 4 ,
3972
        "2b 2f 76 38"     => 4 ,
3973
        "2b 2f 76 39"     => 4 ,
3974
        "2b 2f 76 2b"     => 4 ,
3975
        "2b 2f 76 2f"     => 4 ,
3976
        "dd 73 66 73"     => 4 ,
3977
        "84 31 95 33"     => 4 ,
3978
        "2b 2f 76 38 2d"  => 5 ,
3979
        );
3980
 
3981
    print "-> read_file($file)\n" if $opt_v > 2;
3982
    my @lines = ();
3983
    my $IN = new IO::File $file, "r";
3984
    if (defined $IN) {
3985
        @lines = <$IN>;
3986
        $IN->close;
3987
        if ($lines[$#lines]) {  # test necessary for zero content files
3988
                                # (superfluous?)
3989
            # Some files don't end with a new line.  Force this:
3990
            $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
3991
        }
3992
    } else {
3993
        warn "Unable to read $file\n";
3994
    }
3995
 
3996
    # Are first few characters of the file Unicode Byte Order
3997
    # Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)? 
3998
    # If yes, remove them.
3999
    if (@lines) {
4000
        my @chrs   = split('', $lines[0]);
4001
        my $n_chrs = scalar @chrs;
4002
        my ($n2, $n3, $n4, $n5) = ('', '', '', '');
4003
        $n2 = sprintf("%x %x", map  ord, @chrs[0,1]) if $n_chrs >= 2;
4004
        $n3 = sprintf("%s %x", $n2, ord  $chrs[2])   if $n_chrs >= 3;
4005
        $n4 = sprintf("%s %x", $n3, ord  $chrs[3])   if $n_chrs >= 4;
4006
        $n5 = sprintf("%s %x", $n4, ord  $chrs[4])   if $n_chrs >= 5;
4007
        if      (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
4008
        } elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
4009
        } elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
4010
        } elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
4011
        }
4012
    }
4013
 
4014
    # Trim DOS line endings.  This allows Windows files
4015
    # to be diff'ed with Unix files without line endings
4016
    # causing every line to differ.
4017
    foreach (@lines) { s/\cM$// }
4018
 
4019
    print "<- read_file\n" if $opt_v > 2;
4020
    return @lines;
4021
} # 1}}}
4022
sub rm_blanks {                              # {{{1
4023
    my ($ra_in    ,
4024
        $language ,
4025
        $rh_EOL_continuation_re) = @_;
4026
    print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
4027
#print "rm_blanks: language = [$language]\n";
4028
    my @out = ();
4029
    if ($language eq "COBOL") {
4030
        @out = remove_cobol_blanks($ra_in);
4031
    } else {
4032
        # removes blank lines
4033
        if (defined $rh_EOL_continuation_re->{$language}) {
4034
            @out = remove_matches_2re($ra_in, '^\s*$', 
4035
                                      $rh_EOL_continuation_re->{$language}); 
4036
        } else {
4037
            @out = remove_matches($ra_in, '^\s*$');
4038
        }
4039
    }
4040
 
4041
    print "<- rm_blanks(language=$language)\n" if $opt_v > 2;
4042
    return @out;
4043
} # 1}}}
4044
sub rm_comments {                            # {{{1
4045
    my ($ra_lines , # in, must be free of blank lines
4046
        $language , # in
4047
        $file     , # in (some language counters, eg Haskell, need 
4048
                    #     access to the original file)
4049
        $rh_EOL_continuation_re , # in
4050
       ) = @_;
4051
    print "-> rm_comments(file=$file)\n" if $opt_v > 2;
4052
    my @routines       = @{$Filters_by_Language{$language}};
4053
    my @lines          = @{$ra_lines};
4054
    my @original_lines = @{$ra_lines};
4055
 
4056
    if (!scalar @original_lines) {
4057
        return @lines;
4058
    }
4059
 
4060
    foreach my $call_string (@routines) {
4061
        my $subroutine = $call_string->[0];
4062
        if (! defined &{$subroutine}) {
4063
            warn "rm_comments undefined subroutine $subroutine for $file\n";
4064
            next;
4065
        }
4066
        print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
4067
        my @args  = @{$call_string};
4068
        shift @args; # drop the subroutine name
4069
        if (@args and $args[0] eq '>filename<') {
4070
            shift   @args;
4071
            unshift @args, $file;
4072
        }
4073
 
4074
        no strict 'refs';
4075
        @lines = &{$subroutine}(\@lines, @args);   # apply filter...
4076
 
4077
        print_lines($file, "After $subroutine(@args)", \@lines) 
4078
            if $opt_print_filter_stages;
4079
        # then remove blank lines which are created by comment removal
4080
        if (defined $rh_EOL_continuation_re->{$language}) {
4081
            @lines = remove_matches_2re(\@lines, '^\s*$',
4082
                                        $rh_EOL_continuation_re->{$language});
4083
        } else {
4084
            @lines = remove_matches(\@lines, '^\s*$');
4085
        }
4086
 
4087
        print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines) 
4088
            if $opt_print_filter_stages;
4089
    }
4090
    # Exception for scripting languages:  treat the first #! line as code.
4091
    # Will need to add it back in if it was removed earlier.
4092
    if (defined $Script_Language{$language} and 
4093
        $original_lines[0] =~ /^#!/ and
4094
        (scalar(@lines) == 0 or 
4095
         $lines[0] ne $original_lines[0])) {
4096
        unshift @lines, $original_lines[0];  # add the first line back
4097
    }
4098
 
4099
    foreach (@lines) { chomp }   # make sure no spurious newlines were added
4100
 
4101
    print "<- rm_comments\n" if $opt_v > 2;
4102
    return @lines;
4103
} # 1}}}
4104
sub remove_f77_comments {                    # {{{1
4105
    my ($ra_lines, ) = @_;
4106
    print "-> remove_f77_comments\n" if $opt_v > 2;
4107
 
4108
    my @save_lines = ();
4109
    foreach (@{$ra_lines}) {
4110
        next if m{^[*cC]};
4111
        next if m{^\s*!};
4112
        push @save_lines, $_;
4113
    }
4114
 
4115
    print "<- remove_f77_comments\n" if $opt_v > 2;
4116
    return @save_lines;
4117
} # 1}}}
4118
sub remove_f90_comments {                    # {{{1
4119
    # derived from SLOCCount
4120
    my ($ra_lines, ) = @_;
4121
    print "-> remove_f90_comments\n" if $opt_v > 2;
4122
 
4123
    my @save_lines = ();
4124
    foreach (@{$ra_lines}) {
4125
        # a comment is              m/^\s*!/
4126
        # an empty line is          m/^\s*$/
4127
        # a HPF statement is        m/^\s*!hpf\$/i
4128
        # an Open MP statement is   m/^\s*!omp\$/i
4129
        if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
4130
            push @save_lines, $_;
4131
        }
4132
    }
4133
 
4134
    print "<- remove_f90_comments\n" if $opt_v > 2;
4135
    return @save_lines;
4136
} # 1}}}
4137
sub remove_matches {                         # {{{1
4138
    my ($ra_lines, # in
4139
        $pattern , # in   Perl regular expression (case insensitive)
4140
       ) = @_;
4141
    print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
4142
 
4143
    my @save_lines = ();
4144
    foreach (@{$ra_lines}) {
4145
#chomp; print "remove_matches [$pattern] [$_]\n";
4146
        next if m{$pattern}i;
4147
        push @save_lines, $_;
4148
    }
4149
 
4150
    print "<- remove_matches\n" if $opt_v > 2;
4151
#print "remove_matches returning\n   ", join("\n   ", @save_lines), "\n";
4152
    return @save_lines;
4153
} # 1}}}
4154
sub remove_matches_2re {                     # {{{1
4155
    my ($ra_lines, # in
4156
        $pattern1, # in Perl regex 1 (case insensitive) to match
4157
        $pattern2, # in Perl regex 2 (case insensitive) to not match prev line
4158
       ) = @_;
4159
    print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
4160
 
4161
    my @save_lines = ();
4162
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
4163
#       chomp($ra_lines->[$i]);
4164
#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
4165
        if ($i) {
4166
#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
4167
            next if ($ra_lines->[$i]   =~ m{$pattern1}i) and 
4168
                    ($ra_lines->[$i-1] !~ m{$pattern2}i);
4169
        } else {
4170
            # on first line
4171
            next if $ra_lines->[$i]   =~  m{$pattern1}i;
4172
        }
4173
        push @save_lines, $ra_lines->[$i];
4174
    }
4175
 
4176
    print "<- remove_matches_2re\n" if $opt_v > 2;
4177
#print "remove_matches_2re returning\n   ", join("\n   ", @save_lines), "\n";
4178
    return @save_lines;
4179
} # 1}}}
4180
sub remove_inline {                          # {{{1
4181
    my ($ra_lines, # in
4182
        $pattern , # in   Perl regular expression (case insensitive)
4183
       ) = @_;
4184
    print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
4185
 
4186
    my @save_lines = ();
4187
    unless ($opt_inline) {
4188
        return @{$ra_lines};
4189
    }
4190
    my $nLines_affected = 0;
4191
    foreach (@{$ra_lines}) {
4192
#chomp; print "remove_inline [$pattern] [$_]\n";
4193
        if (m{$pattern}i) {
4194
            ++$nLines_affected;
4195
            s{$pattern}{}i;
4196
        }
4197
        push @save_lines, $_;
4198
    }
4199
 
4200
    print "<- remove_inline\n" if $opt_v > 2;
4201
#print "remove_inline returning\n   ", join("\n   ", @save_lines), "\n";
4202
    return @save_lines;
4203
} # 1}}}
4204
sub remove_above {                           # {{{1
4205
    my ($ra_lines, $marker, ) = @_;
4206
    print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
4207
 
4208
    # Make two passes through the code:
4209
    # 1. check if the marker exists
4210
    # 2. remove anything above the marker if it exists,
4211
    #    do nothing if the marker does not exist
4212
 
4213
    # Pass 1
4214
    my $found_marker = 0;
4215
    for (my $line_number  = 1;
4216
            $line_number <= scalar @{$ra_lines};
4217
            $line_number++) {
4218
        if ($ra_lines->[$line_number-1] =~ m{$marker}) {
4219
            $found_marker = $line_number;
4220
            last;
4221
        }
4222
    }
4223
 
4224
    # Pass 2 only if needed
4225
    my @save_lines = ();
4226
    if ($found_marker) {
4227
        my $n = 1;
4228
        foreach (@{$ra_lines}) {
4229
            push @save_lines, $_
4230
                if $n >= $found_marker;
4231
            ++$n;
4232
        }
4233
    } else { # marker wasn't found; save all lines
4234
        foreach (@{$ra_lines}) {
4235
            push @save_lines, $_;
4236
        }
4237
    }
4238
 
4239
    print "<- remove_above\n" if $opt_v > 2;
4240
    return @save_lines;
4241
} # 1}}}
4242
sub remove_below {                           # {{{1
4243
    my ($ra_lines, $marker, ) = @_;
4244
    print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
4245
 
4246
    my @save_lines = ();
4247
    foreach (@{$ra_lines}) {
4248
        last if m{$marker};
4249
        push @save_lines, $_;
4250
    }
4251
 
4252
    print "<- remove_below\n" if $opt_v > 2;
4253
    return @save_lines;
4254
} # 1}}}
4255
sub remove_below_above {                     # {{{1
4256
    my ($ra_lines, $marker_below, $marker_above, ) = @_;
4257
    # delete lines delimited by start and end line markers such
4258
    # as Perl POD documentation
4259
    print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n" 
4260
        if $opt_v > 2;
4261
 
4262
    my @save_lines = ();
4263
    my $between    = 0;
4264
    foreach (@{$ra_lines}) {
4265
        if (!$between and m{$marker_below}) {
4266
            $between    = 1;
4267
            next;
4268
        }
4269
        if ($between and m{$marker_above}) {
4270
            $between    = 0;
4271
            next;
4272
        }
4273
        next if $between;
4274
        push @save_lines, $_;
4275
    }
4276
 
4277
    print "<- remove_below_above\n" if $opt_v > 2;
4278
    return @save_lines;
4279
} # 1}}}
4280
sub remove_between {                         # {{{1
4281
    my ($ra_lines, $marker, ) = @_;
4282
    # $marker must contain one of the balanced pairs understood
4283
    # by Regexp::Common::balanced, namely
4284
    # '{}'  '()'  '[]'  or  '<>'
4285
 
4286
    print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
4287
    my %acceptable = ('{}'=>1,  '()'=>1,  '[]'=>1,  '<>'=>1, );
4288
    die "remove_between:  invalid delimiter '$marker'\n",
4289
        "the delimiter must be one of these four pairs:\n",
4290
        "{}  ()  []  <>\n" unless
4291
        $acceptable{$marker};
4292
 
4293
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
4294
 
4295
    my $all_lines = join("", @{$ra_lines});
4296
 
4297
    no strict 'vars';
4298
    # otherwise get:
4299
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
4300
    if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
4301
        no warnings; 
4302
        $all_lines =~ s/$1//g;
4303
    }
4304
 
4305
    print "<- remove_between\n" if $opt_v > 2;
4306
    return split("\n", $all_lines);
4307
} # 1}}}
4308
sub remove_between_general {                 # {{{1
4309
    my ($ra_lines, $start_marker, $end_marker, ) = @_;
4310
    # Start and end markers may be any length strings.
4311
 
4312
    print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
4313
        if $opt_v > 2;
4314
 
4315
    my $all_lines = join("", @{$ra_lines});
4316
 
4317
    my @save_lines = ();
4318
    my $in_comment = 0;
4319
    foreach (@{$ra_lines}) {
4320
 
4321
        next if /^\s*$/;
4322
        s/\Q$start_marker\E.*?\Q$end_marker\E//g;  # strip one-line comments
4323
        next if /^\s*$/;
4324
        if ($in_comment) {
4325
            if (/\Q$end_marker\E/) {
4326
                s/^.*?\Q$end_marker\E//;
4327
                $in_comment = 0;
4328
            }
4329
            next if $in_comment;
4330
        }
4331
        next if /^\s*$/;
4332
        $in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
4333
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
4334
        if ($in_comment) {
4335
            # part code, part comment; strip the comment and keep the code
4336
            s/^(.*?)\Q$start_marker\E.*$/$1/;
4337
        }
4338
        push @save_lines, $_;
4339
    }
4340
 
4341
    print "<- remove_between_general\n" if $opt_v > 2;
4342
    return @save_lines;
4343
} # 1}}}
4344
sub remove_between_regex   {                 # {{{1
4345
    my ($ra_lines, $start_RE, $end_RE, ) = @_;
4346
    # Start and end regex's may be any length strings.
4347
 
4348
    print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
4349
        if $opt_v > 2;
4350
 
4351
    my $all_lines = join("", @{$ra_lines});
4352
 
4353
    my @save_lines = ();
4354
    my $in_comment = 0;
4355
    foreach (@{$ra_lines}) {
4356
 
4357
        next if /^\s*$/;
4358
        s/${start_RE}.*?${end_RE}//g;  # strip one-line comments
4359
        next if /^\s*$/;
4360
        if ($in_comment) {
4361
            if (/$end_RE/) {
4362
                s/^.*?${end_RE}//;
4363
                $in_comment = 0;
4364
            }
4365
            next if $in_comment;
4366
        }   
4367
        next if /^\s*$/;
4368
        $in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
4369
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
4370
        if ($in_comment) {
4371
            # part code, part comment; strip the comment and keep the code
4372
            s/^(.*?)${start_RE}.*$/$1/;
4373
        }
4374
        push @save_lines, $_;
4375
    }
4376
 
4377
    print "<- remove_between_regex\n" if $opt_v > 2;
4378
    return @save_lines;
4379
} # 1}}}
4380
sub remove_cobol_blanks {                    # {{{1
4381
    # subroutines derived from SLOCCount
4382
    my ($ra_lines, ) = @_;
4383
 
4384
    my $free_format = 0;  # Support "free format" source code.
4385
    my @save_lines  = ();
4386
 
4387
    foreach (@{$ra_lines}) {
4388
        next if m/^\s*$/;
4389
        my $line = expand($_);  # convert tabs to equivalent spaces
4390
        $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
4391
        if ($free_format) {
4392
            push @save_lines, $_;
4393
        } else {
4394
            # Greg Toth:
4395
            #  (1) Treat lines with any alphanum in cols 1-6 and 
4396
            #      blanks in cols 7 through 71 as blank line, and
4397
            #  (2) Treat lines with any alphanum in cols 1-6 and 
4398
            #      slash (/) in col 7 as blank line (this is a 
4399
            #      page eject directive). 
4400
            push @save_lines, $_ unless m/^\d{6}\s*$/             or 
4401
                                        ($line =~ m/^.{6}\s{66}/) or 
4402
                                        ($line =~ m/^......\//);
4403
        }
4404
    }
4405
    return @save_lines;
4406
} # 1}}}
4407
sub remove_cobol_comments {                  # {{{1
4408
    # subroutines derived from SLOCCount
4409
    my ($ra_lines, ) = @_;
4410
 
4411
    my $free_format = 0;  # Support "free format" source code.
4412
    my @save_lines  = ();
4413
 
4414
    foreach (@{$ra_lines}) {
4415
        if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
4416
        if ($free_format) {
4417
            push @save_lines, $_ unless m{^\s*\*};
4418
        } else {
4419
            push @save_lines, $_ unless m{^......\*} or m{^\*};
4420
        }
4421
    }
4422
    return @save_lines;
4423
} # 1}}}
4424
sub remove_jcl_comments {                    # {{{1
4425
    my ($ra_lines, ) = @_;
4426
 
4427
    print "-> remove_jcl_comments\n" if $opt_v > 2;
4428
 
4429
    my @save_lines = ();
4430
    my $in_comment = 0;
4431
    foreach (@{$ra_lines}) {
4432
        next if /^\s*$/;
4433
        next if m{^\s*//\*};
4434
        last if m{^\s*//\s*$};
4435
        push @save_lines, $_;
4436
    }
4437
 
4438
    print "<- remove_jcl_comments\n" if $opt_v > 2;
4439
    return @save_lines;
4440
} # 1}}}
4441
sub remove_jsp_comments {                    # {{{1
4442
    #  JSP comment is   <%--  body of comment   --%>
4443
    my ($ra_lines, ) = @_;
4444
 
4445
    print "-> remove_jsp_comments\n" if $opt_v > 2;
4446
 
4447
    my @save_lines = ();
4448
    my $in_comment = 0;
4449
    foreach (@{$ra_lines}) {
4450
 
4451
        next if /^\s*$/;
4452
        s/<\%\-\-.*?\-\-\%>//g;  # strip one-line comments
4453
        next if /^\s*$/;
4454
        if ($in_comment) {
4455
            if (/\-\-\%>/) {
4456
                s/^.*?\-\-\%>//;
4457
                $in_comment = 0;
4458
            }
4459
        }
4460
        next if /^\s*$/;
4461
        $in_comment = 1 if /^(.*?)<\%\-\-/;
4462
        next if defined $1 and $1 =~ /^\s*$/;
4463
        next if ($in_comment);
4464
        push @save_lines, $_;
4465
    }
4466
 
4467
    print "<- remove_jsp_comments\n" if $opt_v > 2;
4468
    return @save_lines;
4469
} # 1}}}
4470
sub remove_html_comments {                   # {{{1
4471
    #  HTML comment is   <!--  body of comment   -->
4472
    #  Need to use my own routine until the HTML comment regex in
4473
    #  the Regexp::Common module can handle  <!--  --  -->
4474
    my ($ra_lines, ) = @_;
4475
 
4476
    print "-> remove_html_comments\n" if $opt_v > 2;
4477
 
4478
    my @save_lines = ();
4479
    my $in_comment = 0;
4480
    foreach (@{$ra_lines}) {
4481
 
4482
        next if /^\s*$/;
4483
        s/<!\-\-.*?\-\->//g;  # strip one-line comments
4484
        next if /^\s*$/;
4485
        if ($in_comment) {
4486
            if (/\-\->/) {
4487
                s/^.*?\-\->//;
4488
                $in_comment = 0;
4489
            }
4490
        }
4491
        next if /^\s*$/;
4492
        $in_comment = 1 if /^(.*?)<!\-\-/;
4493
        next if defined $1 and $1 =~ /^\s*$/;
4494
        next if ($in_comment);
4495
        push @save_lines, $_;
4496
    }
4497
 
4498
    print "<- remove_html_comments\n" if $opt_v > 2;
4499
    return @save_lines;
4500
} # 1}}}
4501
sub remove_haml_block {                      # {{{1
4502
    # Haml block comments are defined by a silent comment marker like
4503
    #    /
4504
    # or
4505
    #    -#
4506
    # followed by indented text on subsequent lines.
4507
    # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
4508
    my ($ra_lines, ) = @_;
4509
 
4510
    print "-> remove_haml_block\n" if $opt_v > 2;
4511
 
4512
    my @save_lines = ();
4513
    my $in_comment = 0;
4514
    foreach (@{$ra_lines}) {
4515
 
4516
        next if /^\s*$/;
4517
        my $line = expand($_);  # convert tabs to equivalent spaces
4518
        if ($in_comment) {
4519
            $line =~ /^(\s*)/;
4520
            # print "indent=", length $1, "\n";
4521
            if (length $1 < $in_comment) {
4522
                # indent level is less than comment level
4523
                # are back in code
4524
                $in_comment = 0;
4525
            } else {
4526
                # still in comments, don't use this line
4527
                next;
4528
            }
4529
        } elsif ($line =~ m{^(\s*)(/|-#)\s*$}) {
4530
            if ($1) {
4531
                $in_comment = length $1 + 1; # number of leading spaces + 1
4532
            } else {
4533
                $in_comment = 1;
4534
            }
4535
            # print "in_comment=$in_comment\n";
4536
            next;
4537
        }
4538
        push @save_lines, $line;
4539
    }
4540
 
4541
    print "<- remove_haml_block\n" if $opt_v > 2;
4542
    return @save_lines;
4543
} # 1}}}
4544
sub add_newlines {                           # {{{1
4545
    my ($ra_lines, ) = @_;
4546
    print "-> add_newlines \n" if $opt_v > 2;
4547
 
4548
    my @save_lines = ();
4549
    foreach (@{$ra_lines}) {
4550
 
4551
        push @save_lines, "$_\n";
4552
    }
4553
 
4554
    print "<- add_newlines \n" if $opt_v > 2;
4555
    return @save_lines;
4556
} # 1}}}
4557
sub docstring_to_C {                         # {{{1
4558
    my ($ra_lines, ) = @_;
4559
    # Converts Python docstrings to C comments.
4560
 
4561
    print "-> docstring_to_C()\n" if $opt_v > 2;
4562
 
4563
    my $in_docstring = 0;
4564
    foreach (@{$ra_lines}) {
4565
        while (/"""/) {
4566
            if (!$in_docstring) {
4567
                s{[uU]?"""}{/*};
4568
                $in_docstring = 1;
4569
            } else {
4570
                s{"""}{*/};
4571
                $in_docstring = 0;
4572
            }
4573
        }
4574
    }
4575
 
4576
    print "<- docstring_to_C\n" if $opt_v > 2;
4577
    return @{$ra_lines};
4578
} # 1}}}
4579
sub powershell_to_C {                        # {{{1
4580
    my ($ra_lines, ) = @_;
4581
    # Converts PowerShell block comment markers to C comments.
4582
 
4583
    print "-> powershell_to_C()\n" if $opt_v > 2;
4584
 
4585
    my $in_docstring = 0;
4586
    foreach (@{$ra_lines}) {
4587
        s{<#}{/*}g;
4588
        s{#>}{*/}g;
4589
    }
4590
 
4591
    print "<- powershell_to_C\n" if $opt_v > 2;
4592
    return @{$ra_lines};
4593
} # 1}}}
4594
sub smarty_to_C {                            # {{{1
4595
    my ($ra_lines, ) = @_;
4596
    # Converts Smarty comments to C comments.
4597
 
4598
    print "-> smarty_to_C()\n" if $opt_v > 2;
4599
 
4600
    foreach (@{$ra_lines}) {
4601
        s[{\*][/*]g;
4602
        s[\*}][*/]g;
4603
    }
4604
 
4605
    print "<- smarty_to_C\n" if $opt_v > 2;
4606
    return @{$ra_lines};
4607
} # 1}}}
4608
sub determine_lit_type {                     # {{{1
4609
  my ($file) = @_;
4610
 
4611
  open (FILE, $file);
4612
  while (<FILE>) {
4613
    if (m/^\\begin\{code\}/) { close FILE; return 2; }
4614
    if (m/^>\s/) { close FILE; return 1; }
4615
  }
4616
 
4617
  return 0;
4618
} # 1}}}
4619
sub remove_haskell_comments {                # {{{1
4620
    # Bulk of code taken from SLOCCount's haskell_count script.
4621
    # Strips out {- .. -} and -- comments and counts the rest.
4622
    # Pragmas, {-#...}, are counted as SLOC.
4623
    # BUG: Doesn't handle strings with embedded block comment markers gracefully.
4624
    #      In practice, that shouldn't be a problem.
4625
    my ($ra_lines, $file, ) = @_;
4626
 
4627
    print "-> remove_haskell_comments\n" if $opt_v > 2;
4628
 
4629
    my @save_lines = ();
4630
    my $in_comment = 0;
4631
    my $incomment  = 0;
4632
    my ($literate, $inlitblock) = (0,0);
4633
 
4634
    $literate = 1 if $file =~ /\.lhs$/;
4635
    if($literate) { $literate = determine_lit_type($file) }
4636
 
4637
    foreach (@{$ra_lines}) {
4638
        if ($literate == 1) {
4639
            if (!s/^>//) { s/.*//; }
4640
        } elsif ($literate == 2) {
4641
            if ($inlitblock) {
4642
                if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
4643
            } elsif (!$inlitblock) {
4644
                if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
4645
                else { s/.*//; }
4646
            }
4647
        }
4648
 
4649
        if ($incomment) {
4650
            if (m/\-\}/) { s/^.*?\-\}//;  $incomment = 0;}
4651
            else { s/.*//; }
4652
        }
4653
        if (!$incomment) {
4654
            s/--.*//;
4655
            s!{-[^#].*?-}!!g;
4656
            if (m/{-/ && (!m/{-#/)) {
4657
              s/{-.*//;
4658
              $incomment = 1;
4659
            }
4660
        }
4661
        if (m/\S/) { push @save_lines, $_; }
4662
    }
4663
#   if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
4664
 
4665
    print "<- remove_haskell_comments\n" if $opt_v > 2;
4666
    return @save_lines;
4667
} # 1}}}
4668
sub print_lines {                            # {{{1
4669
    my ($file     , # in
4670
        $title    , # in
4671
        $ra_lines , # in
4672
       ) = @_;
4673
    printf "->%-30s %s\n", $file, $title;
4674
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
4675
        printf "%5d | %s", $i+1, $ra_lines->[$i];
4676
        print "\n" unless $ra_lines->[$i] =~ m{\n$}
4677
    }
4678
} # 1}}}
4679
sub set_constants {                          # {{{1
4680
    my ($rh_Language_by_Extension , # out
4681
        $rh_Language_by_Script    , # out
4682
        $rh_Language_by_File      , # out
4683
        $rhaa_Filters_by_Language , # out
4684
        $rh_Not_Code_Extension    , # out
4685
        $rh_Not_Code_Filename     , # out
4686
        $rh_Scale_Factor          , # out
4687
        $rh_Known_Binary_Archives , # out
4688
        $rh_EOL_continuation_re   , # out
4689
       ) = @_;
4690
# 1}}}
4691
%{$rh_Language_by_Extension} = (             # {{{1
4692
            'abap'        => 'ABAP'                  ,
4693
            'ac'          => 'm4'                    ,
4694
            'ada'         => 'Ada'                   ,
4695
            'adb'         => 'Ada'                   ,
4696
            'ads'         => 'Ada'                   ,
4697
            'adso'        => 'ADSO/IDSM'             ,
4698
            'ahk'         => 'AutoHotkey'            ,
4699
            'am'          => 'make'                  ,
4700
            'ample'       => 'AMPLE'                 ,
4701
            'as'          => 'ActionScript'          ,
4702
            'dofile'      => 'AMPLE'                 ,
4703
            'startup'     => 'AMPLE'                 ,
4704
            'asa'         => 'ASP'                   ,
4705
            'asax'        => 'ASP.Net'               ,
4706
            'ascx'        => 'ASP.Net'               ,
4707
            'asm'         => 'Assembly'              ,
4708
            'asmx'        => 'ASP.Net'               ,
4709
            'asp'         => 'ASP'                   ,
4710
            'aspx'        => 'ASP.Net'               ,
4711
            'master'      => 'ASP.Net'               ,
4712
            'sitemap'     => 'ASP.Net'               ,
4713
            'cshtml'      => 'Razor'                 ,
4714
            'awk'         => 'awk'                   ,
4715
            'bash'        => 'Bourne Again Shell'    ,
4716
            'bas'         => 'Visual Basic'          ,
4717
            'dxl'         => 'DOORS Extension Language',
4718
            'bat'         => 'DOS Batch'             ,
4719
            'BAT'         => 'DOS Batch'             ,
4720
            'cmd'         => 'DOS Batch'             ,
4721
            'CMD'         => 'DOS Batch'             ,
4722
            'btm'         => 'DOS Batch'             ,
4723
            'BTM'         => 'DOS Batch'             ,
4724
            'build.xml'   => 'Ant'                   ,
4725
            'cbl'         => 'COBOL'                 ,
4726
            'CBL'         => 'COBOL'                 ,
4727
            'c'           => 'C'                     ,
4728
            'C'           => 'C++'                   ,
4729
            'cc'          => 'C++'                   ,
4730
            'c++'         => 'C++'                   ,
4731
            'ccs'         => 'CCS'                   ,
4732
            'cfc'         => 'ColdFusion CFScript'   ,
4733
            'cfm'         => 'ColdFusion'            ,
4734
            'cl'          => 'Lisp/OpenCL'           ,
4735
            'clj'         => 'Clojure'               ,
4736
            'cljs'        => 'ClojureScript'         ,
4737
            'cls'         => 'Visual Basic'          , # also Apex Class
4738
            'CMakeLists.txt' => 'CMake'              ,
4739
            'cmake'       => 'CMake'                 ,
4740
            'cob'         => 'COBOL'                 ,
4741
            'COB'         => 'COBOL'                 ,
4742
            'coffee'      => 'CoffeeScript'          ,
4743
            'component'   => 'Visualforce Component' ,
4744
            'cpp'         => 'C++'                   ,
4745
            'cs'          => 'C#'                    ,
4746
            'csh'         => 'C Shell'               ,
4747
            'css'         => "CSS"                   ,
4748
            'ctl'         => 'Visual Basic'          ,
4749
            'cu'          => 'CUDA'                  ,
4750
            'cxx'         => 'C++'                   ,
4751
            'd'           => 'D/dtrace'              ,
4752
# in addition, .d can map to init.d files typically written as 
4753
# bash or sh scripts
4754
            'da'          => 'DAL'                   ,
4755
            'dart'        => 'Dart'                  ,
4756
            'def'         => 'Windows Module Definition',
4757
            'diff'        => 'diff'                  ,
4758
            'dmap'        => 'NASTRAN DMAP'          ,
4759
            'dpr'         => 'Pascal'                ,
4760
            'dita'        => 'DITA'                  ,
4761
            'dsr'         => 'Visual Basic'          ,
4762
            'dtd'         => 'DTD'                   ,
4763
            'ec'          => 'C'                     ,
4764
            'ecpp'        => 'ECPP'                  ,
4765
            'el'          => 'Lisp'                  ,
4766
            'exs'         => 'Elixir'                ,
4767
            'ex'          => 'Elixir'                ,
4768
            'erb'         => 'ERB'                   ,
4769
            'ERB'         => 'ERB'                   ,
4770
            'erl'         => 'Erlang'                ,
4771
            'exp'         => 'Expect'                ,
4772
            'f77'         => 'Fortran 77'            ,
4773
            'F77'         => 'Fortran 77'            ,
4774
            'f90'         => 'Fortran 90'            ,
4775
            'F90'         => 'Fortran 90'            ,
4776
            'f95'         => 'Fortran 95'            ,
4777
            'F95'         => 'Fortran 95'            ,
4778
            'f'           => 'Fortran 77'            ,
4779
            'F'           => 'Fortran 77'            ,
4780
            'for'         => 'Fortran 77'            ,
4781
            'FOR'         => 'Fortran 77'            ,
4782
            'ftn'         => 'Fortran 77'            ,
4783
            'FTN'         => 'Fortran 77'            ,
4784
            'fmt'         => 'Oracle Forms'          ,
4785
            'focexec'     => 'Focus'                 ,
4786
            'frm'         => 'Visual Basic'          ,
4787
            'fs'          => 'F#'                    ,
4788
            'fsi'         => 'F#'                    ,
4789
            'gnumakefile' => 'make'                  ,
4790
            'Gnumakefile' => 'make'                  ,
4791
            'go'          => 'Go'                    ,
4792
            'gsp'         => 'Grails'                ,
4793
            'groovy'      => 'Groovy'                ,
4794
            'gant'        => 'Groovy'                ,
4795
            'gradle'      => 'Groovy'                ,
4796
            'h'           => 'C/C++ Header'          ,
4797
            'H'           => 'C/C++ Header'          ,
4798
            'hh'          => 'C/C++ Header'          ,
4799
            'hpp'         => 'C/C++ Header'          ,
4800
            'hb'          => 'Harbour'               ,
4801
            'hrl'         => 'Erlang'                ,
4802
            'hs'          => 'Haskell'               , 
4803
            'hlsl'        => 'HLSL'                  ,
4804
            'shader'      => 'HLSL'                  ,
4805
            'cg'          => 'HLSL'                  ,
4806
            'cginc'       => 'HLSL'                  ,
4807
            'haml'        => 'Haml'                  ,
4808
            'handlebars'  => 'Handlebars'            ,
4809
            'hbs'         => 'Handlebars'            ,
4810
            'htm'         => 'HTML'                  ,
4811
            'html'        => 'HTML'                  ,
4812
            'i3'          => 'Modula3'               ,
4813
            'idl'         => 'IDL'                   ,
4814
            'ism'         => 'InstallShield'         ,
4815
            'pro'         => 'IDL/Qt Project/Prolog' ,
4816
            'ig'          => 'Modula3'               ,
4817
            'il'          => 'SKILL'                 ,
4818
            'ils'         => 'SKILL++'               ,
4819
            'inc'         => 'PHP/Pascal'            , # might be PHP or Pascal
4820
            'ino'         => 'Arduino Sketch'        ,
4821
            'pde'         => 'Arduino Sketch'        , # pre 1.0
4822
            'itk'         => 'Tcl/Tk'                ,
4823
            'java'        => 'Java'                  ,
4824
            'jcl'         => 'JCL'                   , # IBM Job Control Lang.
4825
            'jl'          => 'Lisp/Julia'            ,
4826
            'js'          => 'Javascript'            ,
4827
            'jsf'         => 'JavaServer Faces'      ,
4828
            'xhtml'       => 'JavaServer Faces'      ,
4829
            'json'        => 'JSON'                  ,
4830
            'jsp'         => 'JSP'                   , # Java server pages
4831
            'jspf'        => 'JSP'                   , # Java server pages
4832
            'vm'          => 'Velocity Template Language' ,
4833
            'ksc'         => 'Kermit'                ,
4834
            'ksh'         => 'Korn Shell'            ,
4835
            'kt'          => 'Kotlin'                ,
4836
            'lhs'         => 'Haskell'               ,
4837
            'l'           => 'lex'                   ,
4838
            'less'        => 'LESS'                  ,
4839
            'lsp'         => 'Lisp'                  ,
4840
            'lisp'        => 'Lisp'                  ,
4841
            'lua'         => 'Lua'                   ,
4842
            'm3'          => 'Modula3'               ,
4843
            'm4'          => 'm4'                    ,
4844
            'makefile'    => 'make'                  ,
4845
            'Makefile'    => 'make'                  ,
4846
            'mc'          => 'Windows Message File'  ,
4847
            'met'         => 'Teamcenter met'        ,
4848
            'mg'          => 'Modula3'               , 
4849
#           'mli'         => 'ML'                    , # ML not implemented
4850
#           'ml'          => 'ML'                    , 
4851
            'ml'          => 'OCaml'                 , 
4852
            'mli'         => 'OCaml'                 , 
4853
            'mly'         => 'OCaml'                 , 
4854
            'mll'         => 'OCaml'                 , 
4855
            'm'           => 'MATLAB/Objective C/MUMPS/Mercury' ,
4856
            'mm'          => 'Objective C++'         ,
4857
            'mustache'    => 'Mustache'              ,
4858
            'wdproj'      => 'MSBuild script'        ,
4859
            'csproj'      => 'MSBuild script'        ,
4860
            'vcproj'      => 'MSBuild script'        ,
4861
            'wixproj'     => 'MSBuild script'        ,
4862
            'vbproj'      => 'MSBuild script'        ,
4863
            'mps'         => 'MUMPS'                 ,
4864
            'mth'         => 'Teamcenter mth'        ,
4865
            'oscript'     => 'LiveLink OScript'      ,
4866
            'pad'         => 'Ada'                   , # Oracle Ada preprocessor
4867
            'page'        => 'Visualforce Page'      ,
4868
            'pas'         => 'Pascal'                ,
4869
            'pcc'         => 'C++'                   , # Oracle C++ preprocessor
4870
            'perl'        => 'Perl'                  ,
4871
            'pfo'         => 'Fortran 77'            ,
4872
            'pgc'         => 'C'                     , # Postgres embedded C/C++
4873
            'php3'        => 'PHP'                   ,
4874
            'php4'        => 'PHP'                   ,
4875
            'php5'        => 'PHP'                   ,
4876
            'php'         => 'PHP'                   ,
4877
            'pig'         => 'Pig Latin'             ,
4878
            'plh'         => 'Perl'                  ,
4879
            'pl'          => 'Perl/Prolog'           ,
4880
            'PL'          => 'Perl/Prolog'           ,
4881
            'plx'         => 'Perl'                  ,
4882
            'pm'          => 'Perl'                  ,
4883
            'pom.xml'     => 'Maven'                 ,
4884
            'pom'         => 'Maven'                 ,
4885
            'P'           => 'Prolog'                ,
4886
            'p'           => 'Pascal'                ,
4887
            'pp'          => 'Pascal/Puppet'         ,
4888
            'psql'        => 'SQL'                   ,
4889
            'py'          => 'Python'                ,
4890
            'pyx'         => 'Cython'                ,
4891
            'qml'         => 'QML'                   ,
4892
            'rb'          => 'Ruby'                  ,
4893
            'rake'        => 'Ruby'                  ,
4894
         #  'resx'        => 'ASP.Net'               ,
4895
            'rex'         => 'Oracle Reports'        ,
4896
            'rexx'        => 'Rexx'                  ,
4897
            'rhtml'       => 'Ruby HTML'             ,
4898
            'rs'          => 'Rust'                  ,
4899
            's'           => 'Assembly'              ,
4900
            'S'           => 'Assembly'              ,
4901
            'SCA'         => 'Visual Fox Pro'        ,
4902
            'sca'         => 'Visual Fox Pro'        ,
4903
            'scala'       => 'Scala'                 ,
4904
            'sbl'         => 'Softbridge Basic'      ,
4905
            'SBL'         => 'Softbridge Basic'      ,
4906
            'sc'          => 'Lisp'                  ,
4907
            'scm'         => 'Lisp'                  ,
4908
            'sed'         => 'sed'                   ,
4909
            'ses'         => 'Patran Command Language'   ,
4910
            'pcl'         => 'Patran Command Language'   ,
4911
            'pl1'         => 'PL/I'                  ,
4912
            'purs'        => 'PureScript'            ,
4913
            'prefab'      => 'Unity-Prefab'          ,
4914
            'proto'       => 'Protocol Buffers'      ,
4915
            'mat'         => 'Unity-Prefab'          ,
4916
            'ps1'         => 'PowerShell'            ,
4917
            'R'           => 'R'                     ,
4918
            'rkt'         => 'Racket'                ,
4919
            'rktl'        => 'Racket'                ,
4920
            'ss'          => 'Racket'                ,
4921
            'scm'         => 'Racket'                ,
4922
            'sch'         => 'Racket'                ,
4923
            'scrbl'       => 'Racket'                ,
4924
            'tsv'         => 'RobotFramework'        ,
4925
            'robot'       => 'RobotFramework'        ,
4926
            'rc'          => 'Windows Resource File' ,
4927
            'rc2'         => 'Windows Resource File' ,
4928
            'sas'         => 'SAS'                   ,
4929
            'sass'        => 'SASS'                  ,
4930
            'scss'        => 'SASS'                  ,
4931
            'sh'          => 'Bourne Shell'          ,
4932
            'smarty'      => 'Smarty'                ,
4933
            'sml'         => 'Standard ML'           ,
4934
            'sig'         => 'Standard ML'           ,
4935
            'fun'         => 'Standard ML'           ,
4936
            'sql'         => 'SQL'                   ,
4937
            'SQL'         => 'SQL'                   ,
4938
            'sproc.sql'   => 'SQL Stored Procedure'  ,
4939
            'spoc.sql'    => 'SQL Stored Procedure'  ,
4940
            'spc.sql'     => 'SQL Stored Procedure'  ,
4941
            'udf.sql'     => 'SQL Stored Procedure'  ,
4942
            'data.sql'    => 'SQL Data'              ,
4943
            'v'           => 'Verilog-SystemVerilog' ,
4944
            'sv'          => 'Verilog-SystemVerilog' ,
4945
            'svh'         => 'Verilog-SystemVerilog' ,
4946
            'tcl'         => 'Tcl/Tk'                ,
4947
            'tcsh'        => 'C Shell'               ,
4948
            'tk'          => 'Tcl/Tk'                ,
4949
            'tpl'         => 'Smarty'                ,
4950
            'trigger'     => 'Apex Trigger'          ,
4951
            'ts'          => 'TypeScript'            ,
4952
            'tss'         => 'Titanium Style Sheet'  ,
4953
            'vala'        => 'Vala'                  ,
4954
            'vapi'        => 'Vala Header'           ,
4955
            'vhd'         => 'VHDL'                  ,
4956
            'VHD'         => 'VHDL'                  ,
4957
            'vhdl'        => 'VHDL'                  ,
4958
            'VHDL'        => 'VHDL'                  ,
4959
            'vba'         => 'Visual Basic'          ,
4960
            'VBA'         => 'Visual Basic'          ,
4961
         #  'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
4962
            'vb'          => 'Visual Basic'          ,
4963
            'VB'          => 'Visual Basic'          ,
4964
         #  'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
4965
            'vbs'         => 'Visual Basic'          ,
4966
            'VBS'         => 'Visual Basic'          ,
4967
            'webinfo'     => 'ASP.Net'               ,
4968
            'xml'         => 'XML'                   ,
4969
            'XML'         => 'XML'                   ,
4970
            'mxml'        => 'MXML'                  ,
4971
            'build'       => 'NAnt script'           ,
4972
            'vim'         => 'vim script'            ,
4973
            'swift'       => 'Swift'                 ,
4974
            'xaml'        => 'XAML'                  ,
4975
            'wxs'         => 'WiX source'            ,
4976
            'wxi'         => 'WiX include'           ,
4977
            'wxl'         => 'WiX string localization' ,
4978
            'prg'         => 'xBase'                 ,
4979
            'ch'          => 'xBase Header'          ,
4980
            'xq'          => 'XQuery'                ,
4981
            'xquery'      => 'XQuery'                ,
4982
            'xsd'         => 'XSD'                   ,
4983
            'XSD'         => 'XSD'                   ,
4984
            'xslt'        => 'XSLT'                  ,
4985
            'XSLT'        => 'XSLT'                  ,
4986
            'xsl'         => 'XSLT'                  ,
4987
            'XSL'         => 'XSLT'                  ,
4988
            'y'           => 'yacc'                  ,
4989
            'yaml'        => 'YAML'                  ,
4990
            'yml'         => 'YAML'                  ,
4991
            );
4992
# 1}}}
4993
%{$rh_Language_by_Script}    = (             # {{{1
4994
            'awk'      => 'awk'                   ,
4995
            'bash'     => 'Bourne Again Shell'    ,
4996
            'bc'       => 'bc'                    ,# calculator
4997
            'csh'      => 'C Shell'               ,
4998
            'dmd'      => 'D'                     ,
4999
            'dtrace'   => 'dtrace'                ,
5000
            'idl'      => 'IDL'                   ,
5001
            'kermit'   => 'Kermit'                ,
5002
            'ksh'      => 'Korn Shell'            ,
5003
            'lua'      => 'Lua'                   ,
5004
            'make'     => 'make'                  ,
5005
            'octave'   => 'Octave'                ,
5006
            'perl5'    => 'Perl'                  ,
5007
            'perl'     => 'Perl'                  ,
5008
            'php'      => 'PHP'                   ,
5009
            'php5'     => 'PHP'                   ,
5010
            'python'   => 'Python'                ,
5011
            'python2.6'=> 'Python'                ,
5012
            'python2.7'=> 'Python'                ,
5013
            'python3'  => 'Python'                ,
5014
            'python3.3'=> 'Python'                ,
5015
            'python3.4'=> 'Python'                ,
5016
            'rexx'     => 'Rexx'                  ,
5017
            'regina'   => 'Rexx'                  ,
5018
            'ruby'     => 'Ruby'                  ,
5019
            'sed'      => 'sed'                   ,
5020
            'sh'       => 'Bourne Shell'          ,
5021
            'swipl'    => 'Prolog'                ,
5022
            'tcl'      => 'Tcl/Tk'                ,
5023
            'tclsh'    => 'Tcl/Tk'                ,
5024
            'tcsh'     => 'C Shell'               ,
5025
            'wish'     => 'Tcl/Tk'                ,
5026
            );
5027
# 1}}}
5028
%{$rh_Language_by_File}      = (             # {{{1
5029
            'Makefile'       => 'make'               ,
5030
            'makefile'       => 'make'               ,
5031
            'gnumakefile'    => 'make'               ,
5032
            'Gnumakefile'    => 'make'               ,
5033
            'CMakeLists.txt' => 'CMake'              ,
5034
            'build.xml'      => 'Ant/XML'            ,
5035
            'pom.xml'        => 'Maven/XML'          ,
5036
            'Rakefile'       => 'Ruby'               ,
5037
            'rakefile'       => 'Ruby'               ,
5038
            );
5039
# 1}}}
5040
%{$rhaa_Filters_by_Language} = (             # {{{1
5041
    '(unknown)'          => [ ],
5042
    'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
5043
    'ActionScript'       => [   
5044
                                [ 'remove_matches'      , '^\s*//' ], 
5045
                                [ 'call_regexp_common'  , 'C'      ],
5046
                            ],
5047
 
5048
    'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5049
    'ASP.Net'            => [   [ 'call_regexp_common'  , 'C'      ], ],
5050
    'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
5051
    'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
5052
    'AMPLE'              => [   [ 'remove_matches'      , '^\s*//' ], ],
5053
    'Ant/XML'            => [
5054
                                [ 'remove_html_comments',          ],
5055
                                [ 'call_regexp_common'  , 'HTML'   ], 
5056
                            ],
5057
    'Ant'                => [
5058
                                [ 'remove_html_comments',          ],
5059
                                [ 'call_regexp_common'  , 'HTML'   ], 
5060
                            ],
5061
    'Apex Trigger'       => [
5062
                                [ 'remove_matches'      , '^\s*//' ],
5063
                                [ 'call_regexp_common'  , 'C'      ], 
5064
                                [ 'remove_inline'       , '//.*$'  ],
5065
                            ],
5066
    'Arduino Sketch'     => [   # same as C
5067
                                [ 'remove_matches'      , '^\s*//' ],
5068
                                [ 'call_regexp_common'  , 'C'      ], 
5069
                                [ 'remove_inline'       , '//.*$'  ],
5070
                            ], 
5071
    'Assembly'           => [  
5072
                                [ 'remove_matches'      , '^\s*//' ],
5073
                                [ 'remove_matches'      , '^\s*;'  ],
5074
                                [ 'remove_matches'      , '^\s*\@' ], 
5075
                                [ 'remove_matches'      , '^\s*\|' ], 
5076
                                [ 'remove_matches'      , '^\s*!'  ], 
5077
                                [ 'remove_matches'      , '^\s*#'  ], 
5078
                                [ 'remove_matches'      , '^\s*--' ], 
5079
                                [ 'call_regexp_common'  , 'C'      ], 
5080
                                [ 'remove_inline'       , '//.*$'  ], 
5081
                                [ 'remove_inline'       , ';.*$'   ], 
5082
                                [ 'remove_inline'       , '\@.*$'  ], 
5083
                                [ 'remove_inline'       , '\|.*$'  ], 
5084
                                [ 'remove_inline'       , '!.*$'   ], 
5085
                                [ 'remove_inline'       , '#.*$'   ], 
5086
                                [ 'remove_inline'       , '--.*$'  ], 
5087
                            ],
5088
    'AutoHotkey'         => [   
5089
                                [ 'remove_matches'      , '^\s*;'  ],
5090
                                [ 'remove_inline'       , ';.*$'   ],
5091
                            ],
5092
    'awk'                => [   
5093
                                [ 'remove_matches'      , '^\s*#'  ], 
5094
                                [ 'remove_inline'       , '#.*$'   ],
5095
                            ], 
5096
    'bc'                 => [   
5097
                                [ 'remove_matches'      , '^\s*#'  ], 
5098
                                [ 'remove_inline'       , '#.*$'   ],
5099
                            ], 
5100
    'Bourne Again Shell' => [   
5101
                                [ 'remove_matches'      , '^\s*#'  ], 
5102
                                [ 'remove_inline'       , '#.*$'   ],
5103
                            ], 
5104
    'Bourne Shell'       => [   
5105
                                [ 'remove_matches'      , '^\s*#'  ], 
5106
                                [ 'remove_inline'       , '#.*$'   ],
5107
                            ], 
5108
    'C'                  => [   
5109
                                [ 'remove_matches'      , '^\s*//' ], # C99
5110
                                [ 'call_regexp_common'  , 'C'      ], 
5111
                                [ 'remove_inline'       , '//.*$'  ], # C99
5112
                            ], 
5113
    'C++'                => [   
5114
                                [ 'remove_matches'      , '^\s*//' ], 
5115
                                [ 'remove_inline'       , '//.*$'  ], 
5116
                                [ 'call_regexp_common'  , 'C'      ],
5117
                            ],
5118
    'C/C++ Header'       => [   
5119
                                [ 'remove_matches'      , '^\s*//' ], 
5120
                                [ 'call_regexp_common'  , 'C'      ], 
5121
                                [ 'remove_inline'       , '//.*$'  ], 
5122
                            ],
5123
    'Clojure'            => [   [ 'remove_matches'      , '^\s*;'  ], ],
5124
    'ClojureScript'      => [   [ 'remove_matches'      , '^\s*;'  ], ],
5125
    'CMake'              => [   
5126
                                [ 'remove_matches'      , '^\s*#'  ],
5127
                                [ 'remove_inline'       , '#.*$'   ], 
5128
                            ],
5129
    'CUDA'               => [   
5130
                                [ 'remove_matches'      , '^\s*//' ], 
5131
                                [ 'remove_inline'       , '//.*$'  ], 
5132
                                [ 'call_regexp_common'  , 'C'      ],
5133
                            ],
5134
    'Cython'             => [   
5135
                                [ 'remove_matches'      , '^\s*#'  ], 
5136
                                [ 'docstring_to_C'                 ], 
5137
                                [ 'call_regexp_common'  , 'C'      ],
5138
                                [ 'remove_inline'       , '#.*$'   ],
5139
                            ], 
5140
    'C#'                 => [   
5141
                                [ 'remove_matches'      , '^\s*//' ], 
5142
                                [ 'call_regexp_common'  , 'C'      ],
5143
                                [ 'remove_inline'       , '//.*$'  ], 
5144
                            ],
5145
    'CCS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5146
    'CSS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5147
    'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
5148
    'CoffeeScript'       => [   
5149
                                [ 'remove_matches'      , '^\s*#'  ],
5150
                                [ 'remove_inline'       , '#.*$'   ], 
5151
                            ],
5152
    'ColdFusion'         => [   [ 'remove_html_comments',          ],
5153
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5154
    'ColdFusion CFScript'=> [
5155
                                [ 'remove_matches'      , '^\s*//' ], 
5156
                                [ 'call_regexp_common'  , 'C'      ],
5157
                                [ 'remove_inline'       , '//.*$'  ], 
5158
                            ],
5159
    'Crystal Reports'    => [   [ 'remove_matches'      , '^\s*//' ], ],
5160
    'D/dtrace'           => [ [ 'die' ,          ], ], # never called
5161
    'D'                  => [   
5162
                                [ 'remove_matches'      , '^\s*//' ], 
5163
                                [ 'remove_between_general', '/+', '+/' ],
5164
                                [ 'call_regexp_common'  , 'C'      ],
5165
                                [ 'remove_inline'       , '//.*$'  ], 
5166
                            ],
5167
    'DAL'                => [
5168
                                [ 'remove_between_general', '[', ']', ],
5169
                            ],
5170
    'Dart'               => [   
5171
                                [ 'remove_matches'      , '^\s*//' ], 
5172
                                [ 'remove_inline'       , '//.*$'  ], 
5173
                                [ 'call_regexp_common'  , 'C'      ],
5174
                            ],
5175
    # diff is kind of weird: anything but a space in the first column
5176
    # will count as code, with the exception of #, ---, +++.  Spaces
5177
    # in the first column denote context lines which aren't part of the
5178
    # difference.
5179
    'diff'               => [   
5180
                                [ 'remove_matches'      , '^#' ], 
5181
                                [ 'remove_matches'      , '^\-\-\-' ], 
5182
                                [ 'remove_matches'      , '^\+\+\+' ], 
5183
                                [ 'remove_matches'      , '^\s' ], 
5184
                            ],
5185
    'DITA'               => [   
5186
                                [ 'remove_html_comments',          ],
5187
                                [ 'call_regexp_common'  , 'HTML'   ],
5188
                            ],
5189
    'DOORS Extension Language' => [
5190
                                [ 'remove_matches'      , '^\s*//' ], 
5191
                                [ 'remove_inline'       , '//.*$'  ], 
5192
                                [ 'call_regexp_common'  , 'C'      ],
5193
                            ],
5194
    'dtrace'             => [   
5195
                                [ 'remove_matches'      , '^\s*#'  ], 
5196
                                [ 'remove_inline'       , '#.*$'   ],
5197
                            ], 
5198
    'ECPP'               => [   
5199
                                [ 'remove_between_general', 
5200
                                  '<%doc>', '</%doc>',             ],
5201
                                [ 'remove_between_general', 
5202
                                  '<#'    , '#>'     ,             ],
5203
                                [ 'call_regexp_common'  , 'HTML'   ], 
5204
                            ],
5205
    'ERB'                => [   
5206
                                [ 'remove_between_general', '<%#', '%>' ],
5207
                            ],
5208
    'NASTRAN DMAP'       => [   
5209
                                [ 'remove_matches'      , '^\s*\$' ], 
5210
                                [ 'remove_inline'       , '\$.*$'  ], 
5211
                            ],
5212
    'DOS Batch'          => [   [ 'remove_matches'      , '^\s*rem', ], ],
5213
    'DTD'                => [   [ 'remove_html_comments',          ],
5214
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5215
    'Elixir'             => [
5216
                                [ 'remove_matches'      , '^\s*#'  ],
5217
                                [ 'remove_inline'       , '#.*$'   ],
5218
                            ],
5219
    'Erlang'             => [   
5220
                                [ 'remove_matches'      , '^\s*%'  ], 
5221
                                [ 'remove_inline'       , '%.*$'   ],
5222
                            ],
5223
    'Expect'             => [   
5224
                                [ 'remove_matches'      , '^\s*#'  ], 
5225
                                [ 'remove_inline'       , '#.*$'   ],
5226
                            ], 
5227
    'Focus'              => [   [ 'remove_matches'      , '^\s*\-\*'  ], ],
5228
    'Fortran 77'         => [   
5229
                                [ 'remove_f77_comments' ,          ], 
5230
                                [ 'remove_inline'       , '\!.*$'  ],
5231
                            ],
5232
    'Fortran 90'         => [   
5233
                                [ 'remove_f77_comments' ,          ],
5234
                                [ 'remove_f90_comments' ,          ], 
5235
                                [ 'remove_inline'       , '\!.*$'  ],
5236
                            ],
5237
    'Fortran 95'         => [   
5238
                                [ 'remove_f77_comments' ,          ],
5239
                                [ 'remove_f90_comments' ,          ], 
5240
                                [ 'remove_inline'       , '\!.*$'  ],
5241
                            ],
5242
    'F#'                 => [   
5243
                                [ 'call_regexp_common'  , 'Pascal' ], 
5244
                                [ 'remove_matches'      , '^\s*//' ],
5245
                            ],
5246
    'Go'                 => [   
5247
                                [ 'remove_matches'      , '^\s*//' ], 
5248
                                [ 'remove_inline'       , '//.*$'  ], 
5249
                                [ 'call_regexp_common'  , 'C'      ],
5250
                            ],
5251
    'Grails'             => [   
5252
                                [ 'remove_html_comments',          ],
5253
                                [ 'call_regexp_common'  , 'HTML'   ],
5254
                                [ 'remove_jsp_comments' ,          ], 
5255
                                [ 'remove_matches'      , '^\s*//' ],
5256
                                [ 'add_newlines'        ,          ],
5257
                                [ 'call_regexp_common'  , 'C'      ],
5258
                            ],
5259
    'Groovy'             => [   
5260
                                [ 'remove_matches'      , '^\s*//' ], 
5261
                                [ 'remove_inline'       , '//.*$'  ], 
5262
                                [ 'call_regexp_common'  , 'C'      ],
5263
                            ],
5264
    'Handlebars'         => [
5265
                                [ 'remove_between_general', '{{!--', '--}}' ],
5266
                                [ 'remove_between_general', '{{!', '}}' ],
5267
                                [ 'remove_html_comments',          ],
5268
                            ],
5269
    'Harbour'            => [
5270
                                [ 'remove_matches'      , '^\s*//' ], 
5271
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5272
                                [ 'remove_matches'      , '^\s*\*' ], 
5273
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5274
                                [ 'remove_matches'      , '^\s*note' ], 
5275
                                [ 'remove_matches'      , '^\s*Note' ], 
5276
                                [ 'remove_inline'       , '//.*$'  ], 
5277
                                [ 'remove_inline'       , '\&\&.*$' ], 
5278
                                [ 'call_regexp_common'  , 'C'      ],
5279
                            ],
5280
    'HLSL'               => [   
5281
                                [ 'remove_matches'      , '^\s*//' ], 
5282
                                [ 'remove_inline'       , '//.*$'  ], 
5283
                                [ 'call_regexp_common'  , 'C'      ],
5284
                            ],
5285
    'Haml'               => [   
5286
                                [ 'remove_haml_block'   ,          ], 
5287
                                [ 'remove_html_comments',          ],
5288
                                [ 'remove_matches'      , '^\s*/\s*\S+' ], 
5289
                                [ 'remove_matches'      , '^\s*-#\s*\S+' ], 
5290
                            ],
5291
    'HTML'               => [   
5292
                                [ 'remove_html_comments',          ],
5293
                                [ 'call_regexp_common'  , 'HTML'   ], 
5294
                            ],
5295
    'Haskell'            => [   [ 'remove_haskell_comments', '>filename<' ], ],
5296
    'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
5297
    'IDL/Qt Project/Prolog' => [ [ 'die' ,          ], ], # never called
5298
    'InstallShield'      => [   [ 'remove_html_comments',          ],
5299
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5300
    'JSP'                => [   [ 'remove_html_comments',          ],
5301
                                [ 'call_regexp_common'  , 'HTML'   ],
5302
                                [ 'remove_jsp_comments' ,          ], 
5303
                                [ 'remove_matches'      , '^\s*//' ],
5304
                                [ 'add_newlines'        ,          ],
5305
                                [ 'call_regexp_common'  , 'C'      ],
5306
                            ],
5307
    'JavaServer Faces'   => [   
5308
                                [ 'remove_matches'      , '^\s*//' ], 
5309
                                [ 'call_regexp_common'  , 'C'      ],
5310
                                [ 'remove_inline'       , '//.*$'  ], 
5311
                            ],
5312
    'Java'               => [   
5313
                                [ 'remove_matches'      , '^\s*//' ], 
5314
                                [ 'call_regexp_common'  , 'C'      ],
5315
                                [ 'remove_inline'       , '//.*$'  ], 
5316
                            ],
5317
    'Javascript'         => [   
5318
                                [ 'remove_matches'      , '^\s*//' ], 
5319
                                [ 'call_regexp_common'  , 'C'      ],
5320
                                [ 'remove_inline'       , '//.*$'  ], 
5321
                            ],
5322
    'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
5323
    'JSON'               => [   # ECMA-404, the JSON standard definition
5324
                                # makes no provision for JSON comments
5325
                                # so just use a placeholder filter
5326
                                [ 'remove_matches'      , '^\s*$'  ], 
5327
                            ],
5328
    'Julia'              => [
5329
                                [ 'remove_matches'      , '^\s*#'  ], 
5330
                                [ 'remove_inline'       , '#.*$'   ],
5331
                                [ 'remove_between_general', '#=', '=#' ],
5332
                            ],
5333
    'Kotlin'             => [
5334
                                [ 'remove_matches'      , '^\s*//' ], 
5335
                                [ 'remove_inline'       , '//.*$'  ], 
5336
                                [ 'remove_between_general', '/*', '*/' ],
5337
                            ],
5338
    'LESS'               => [
5339
                                [ 'remove_matches'      , '^\s*//' ], 
5340
                                [ 'call_regexp_common'  , 'C'      ],
5341
                                [ 'remove_inline'       , '//.*$'  ], 
5342
                            ],
5343
    'Lisp'               => [   
5344
                                [ 'remove_matches'      , '^\s*;'  ], 
5345
                                [ 'remove_between_general', '#|', '|#' ],
5346
                            ],
5347
    'Lisp/OpenCL'        => [ [ 'die' ,          ], ], # never called
5348
    'Lisp/Julia'         => [ [ 'die' ,          ], ], # never called
5349
    'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
5350
#   'Lua'                => [   [ 'call_regexp_common'  , 'lua'    ], ],
5351
    'Lua'                => [   [ 'remove_matches'      , '^\s*\-\-' ], ],
5352
    'make'               => [   
5353
                                [ 'remove_matches'      , '^\s*#'  ], 
5354
                                [ 'remove_inline'       , '#.*$'   ],
5355
                            ], 
5356
    'MATLAB'             => [   
5357
                                [ 'remove_matches'      , '^\s*%'  ], 
5358
                                [ 'remove_inline'       , '%.*$'   ],
5359
                            ], 
5360
    'Maven/XML'          => [
5361
                                [ 'remove_html_comments',          ],
5362
                                [ 'call_regexp_common'  , 'HTML'   ], 
5363
                            ],
5364
    'Maven'              => [
5365
                                [ 'remove_html_comments',          ],
5366
                                [ 'call_regexp_common'  , 'HTML'   ], 
5367
                            ],
5368
    'Mercury'            => [   
5369
                                [ 'remove_inline'       , '%.*$'   ],
5370
                                [ 'remove_matches'      , '^\s*%'  ], 
5371
                            ], 
5372
    'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
5373
        # Modula 3 comments are (* ... *) so applying the Pascal filter
5374
        # which also treats { ... } as a comment is not really correct.
5375
    'Objective C'        => [   
5376
                                [ 'remove_matches'      , '^\s*//' ],
5377
                                [ 'call_regexp_common'  , 'C'      ], 
5378
                                [ 'remove_inline'       , '//.*$'  ], 
5379
                            ], 
5380
    'Objective C++'      => [   
5381
                                [ 'remove_matches'      , '^\s*//' ],
5382
                                [ 'call_regexp_common'  , 'C'      ], 
5383
                                [ 'remove_inline'       , '//.*$'  ], 
5384
                            ], 
5385
    'OCaml'              => [   
5386
                                [ 'call_regexp_common'  , 'Pascal' ], 
5387
                            ],
5388
    'OpenCL'             => [   
5389
                                [ 'remove_matches'      , '^\s*//' ], # C99
5390
                                [ 'call_regexp_common'  , 'C'      ], 
5391
                                [ 'remove_inline'       , '//.*$'  ], # C99
5392
                            ],
5393
    'PHP/Pascal'               => [ [ 'die' ,          ], ], # never called
5394
    'MATLAB/Objective C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
5395
    'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ], 
5396
    'Mustache'           => [
5397
                                [ 'remove_between_general', '{{!', '}}' ],
5398
                            ],
5399
    'Octave'             => [   
5400
                                [ 'remove_matches'      , '^\s*#'  ], 
5401
                                [ 'remove_inline'       , '#.*$'   ],
5402
                            ], 
5403
    'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
5404
    'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
5405
    'Pascal'             => [
5406
                                [ 'remove_between_regex', '{[^$]', '}' ],
5407
                                [ 'remove_between_general', '(*', '*)' ],
5408
                                [ 'remove_matches'      , '^\s*//' ],
5409
                            ],
5410
####'Pascal'             => [   
5411
####                            [ 'call_regexp_common'  , 'Pascal' ], 
5412
####                            [ 'remove_matches'      , '^\s*//' ],
5413
####                        ],
5414
    'Pascal/Puppet'            => [ [ 'die' ,          ], ], # never called
5415
    'Puppet'             => [   
5416
                                [ 'remove_matches'      , '^\s*#'   ], 
5417
                                [ 'call_regexp_common'  , 'C'       ],
5418
                                [ 'remove_inline'       , '#.*$'   ],
5419
                            ],
5420
    'PureScript'         => [   
5421
                                [ 'remove_matches'      , '^\s*--' ],
5422
                                [ 'remove_between_general', '{-', '-}' ],
5423
                                [ 'remove_inline'       , '--.*$'  ],
5424
                            ],
5425
    'Patran Command Language'=> [   
5426
                                [ 'remove_matches'      , '^\s*#'   ], 
5427
                                [ 'remove_matches'      , '^\s*\$#' ], 
5428
                                [ 'call_regexp_common'  , 'C'       ],
5429
                            ],
5430
    'PL/I'               => [
5431
                                [ 'call_regexp_common'  , 'C'      ], 
5432
                            ],
5433
    'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
5434
                                [ 'remove_matches'      , '^\s*#'  ], 
5435
                                [ 'remove_below_above'  , '^=head1', '^=cut'  ], 
5436
                                [ 'remove_inline'       , '#.*$'   ],
5437
                            ], 
5438
    'Perl/Prolog'        => [ [ 'die' ,          ], ], # never called
5439
    'Pig Latin'          => [   
5440
                                [ 'remove_matches'      , '^\s*--' ],
5441
                                [ 'remove_inline'       , '--.*$'  ],
5442
                                [ 'call_regexp_common'  , 'C'       ],
5443
                            ],
5444
    'PowerShell'         => [ 
5445
                                [ 'powershell_to_C'                ], 
5446
                                [ 'call_regexp_common'  , 'C'      ],
5447
                                [ 'remove_matches'      , '^\s*#'  ], 
5448
                                [ 'remove_inline'       , '#.*$'   ],
5449
                            ], 
5450
    'Prolog'             => [   
5451
                                [ 'remove_matches'      , '^\s*\%' ],
5452
                                [ 'call_regexp_common'  , 'C'      ],
5453
                                [ 'remove_inline'       , '(//|\%).*$' ], 
5454
                            ],
5455
    'Protocol Buffers'   => [   
5456
                                [ 'remove_matches'      , '^\s*//' ], 
5457
                                [ 'remove_inline'       , '//.*$'  ], 
5458
                                [ 'call_regexp_common'  , 'C'      ],
5459
                            ],
5460
    'Python'             => [   
5461
                                [ 'remove_matches'      , '^\s*#'  ], 
5462
                                [ 'docstring_to_C'                 ], 
5463
                                [ 'call_regexp_common'  , 'C'      ],
5464
                                [ 'remove_inline'       , '#.*$'   ],
5465
                            ], 
5466
    'PHP'                => [   
5467
                                [ 'remove_matches'      , '^\s*#'  ],
5468
                                [ 'remove_matches'      , '^\s*//' ], 
5469
                                [ 'call_regexp_common'  , 'C'      ], 
5470
                                [ 'remove_inline'       , '#.*$'   ],
5471
                                [ 'remove_inline'       , '//.*$'  ],
5472
                            ],
5473
    'QML'                => [   
5474
                                [ 'remove_matches'      , '^\s*//' ], 
5475
                                [ 'call_regexp_common'  , 'C'      ],
5476
                                [ 'remove_inline'       , '//.*$'  ], 
5477
                            ],
5478
    'Qt Project'         => [   
5479
                                [ 'remove_matches'      , '^\s*#'  ], 
5480
                                [ 'remove_inline'       , '#.*$'   ],
5481
                            ],
5482
    'R'                  => [   
5483
                                [ 'remove_matches'      , '^\s*#'  ], 
5484
                                [ 'remove_inline'       , '#.*$'   ],
5485
                            ], 
5486
    'Racket'             => [   
5487
                                [ 'remove_matches'      , '^\s*;'  ], 
5488
                                [ 'remove_inline'       , ';.*$'   ],
5489
                            ], 
5490
    'Razor'              => [
5491
                                [ 'remove_matches'      , '^\s*//' ],
5492
                                [ 'remove_between_general', '@*', '*@' ],
5493
                                [ 'call_regexp_common'  , 'C'      ], 
5494
                                [ 'remove_inline'       , '//.*$'  ],
5495
                            ], 
5496
    'RobotFramework'     => [   
5497
                                [ 'remove_matches'      , '^\s*#'   ], 
5498
                                [ 'remove_matches'      , '^\s*Comment' ], 
5499
                                [ 'remove_matches'      , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
5500
                                [ 'remove_matches'      , '^\s*\[(Documentation|Tags)\]' ],
5501
                                [ 'remove_inline'       , '#.*$'   ],
5502
                            ],
5503
    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
5504
    'Ruby'               => [   
5505
                                [ 'remove_matches'      , '^\s*#'  ], 
5506
                                [ 'remove_below_above'  , '^=begin', '^=end' ], 
5507
                                [ 'remove_inline'       , '#.*$'   ],
5508
                            ], 
5509
    'Ruby HTML'          => [   [ 'remove_html_comments',          ],
5510
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5511
    'Rust'               => [   
5512
                                [ 'remove_matches'      , '^\s*//' ], 
5513
                                [ 'remove_inline'       , '//.*$'  ], 
5514
                                [ 'call_regexp_common'  , 'C'      ],
5515
                            ],
5516
    'SAS'                => [   
5517
                                [ 'call_regexp_common'  , 'C'      ],
5518
                                [ 'remove_between_general', '*', ';' ],
5519
                            ],
5520
    'SASS'               => [   
5521
                                [ 'remove_matches'      , '^\s*//' ], 
5522
                                [ 'remove_inline'       , '//.*$'  ], 
5523
                            ],
5524
    'Scala'              => [   
5525
                                [ 'remove_matches'      , '^\s*//' ], 
5526
                                [ 'remove_inline'       , '//.*$'  ], 
5527
                                [ 'call_regexp_common'  , 'C'      ],
5528
                            ],
5529
    'SKILL'              => [   
5530
                                [ 'call_regexp_common'  , 'C'      ], 
5531
                                [ 'remove_matches'      , '^\s*;'  ],
5532
                            ],
5533
    'SKILL++'            => [   
5534
                                [ 'call_regexp_common'  , 'C'      ], 
5535
                                [ 'remove_matches'      , '^\s*;'  ],
5536
                            ],
5537
    'SQL'                => [   
5538
                                [ 'call_regexp_common'  , 'C'      ], 
5539
                                [ 'remove_matches'      , '^\s*--' ],
5540
                                [ 'remove_inline'       , '--.*$'  ],
5541
                            ],
5542
    'SQL Stored Procedure'=> [   
5543
                                [ 'call_regexp_common'  , 'C'      ], 
5544
                                [ 'remove_matches'      , '^\s*--' ],
5545
                                [ 'remove_inline'       , '--.*$'  ],
5546
                            ],
5547
    'SQL Data'           => [   
5548
                                [ 'call_regexp_common'  , 'C'      ], 
5549
                                [ 'remove_matches'      , '^\s*--' ],
5550
                                [ 'remove_inline'       , '--.*$'  ],
5551
                            ],
5552
    'sed'                => [   
5553
                                [ 'remove_matches'      , '^\s*#'  ], 
5554
                                [ 'remove_inline'       , '#.*$'   ],
5555
                            ], 
5556
    'Smarty'             => [   
5557
                                [ 'smarty_to_C'                    ], 
5558
                                [ 'call_regexp_common'  , 'C'      ],
5559
                            ], 
5560
    'Standard ML'        => [   
5561
                                [ 'remove_between_general', '(*', '*)' ],
5562
                            ], 
5563
    'Swift'              => [   
5564
                                [ 'remove_matches'      , '^\s*//' ],
5565
                                [ 'call_regexp_common'  , 'C'      ],
5566
                                [ 'remove_inline'       , '//.*$'  ],
5567
                            ],
5568
 
5569
    'm4'                 => [   [ 'remove_matches'      , '^dnl '  ], ], 
5570
    'C Shell'            => [   
5571
                                [ 'remove_matches'      , '^\s*#'  ], 
5572
                                [ 'remove_inline'       , '#.*$'   ],
5573
                            ], 
5574
    'Kermit'             => [  
5575
                                [ 'remove_matches'      , '^\s*#'  ], 
5576
                                [ 'remove_matches'      , '^\s*;'  ], 
5577
                                [ 'remove_inline'       , '#.*$'   ],
5578
                            ], 
5579
    'Korn Shell'         => [   
5580
                                [ 'remove_matches'      , '^\s*#'  ], 
5581
                                [ 'remove_inline'       , '#.*$'   ],
5582
                            ], 
5583
    'Tcl/Tk'             => [   
5584
                                [ 'remove_matches'      , '^\s*#'  ], 
5585
                                [ 'remove_inline'       , '#.*$'   ],
5586
                            ], 
5587
    'Teamcenter met'     => [   [ 'call_regexp_common'  , 'C'      ], ],
5588
    'Teamcenter mth'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
5589
    'Titanium Style Sheet'  => [
5590
                                [ 'remove_matches'      , '^\s*//' ], 
5591
                                [ 'remove_inline'       , '//.*$'  ], 
5592
                                [ 'remove_between_regex', '/[^/]', '[^/]/' ],
5593
                            ],
5594
    'TypeScript'         => [   
5595
                                [ 'remove_matches'      , '^\s*//' ], 
5596
                                [ 'remove_inline'       , '//.*$'  ], 
5597
                                [ 'call_regexp_common'  , 'C'      ],
5598
                            ],
5599
    'Unity-Prefab'       => [   
5600
                                [ 'remove_matches'      , '^\s*#'  ], 
5601
                                [ 'remove_inline'       , '#.*$'   ], 
5602
                            ],
5603
    'Visual Fox Pro'     =>  [
5604
                                [ 'remove_matches'      , '^\s*\*' ],
5605
                                [ 'remove_inline'       , '\*.*$'  ],
5606
                                [ 'remove_matches'      , '^\s*&&' ],
5607
                                [ 'remove_inline'       , '&&.*$'  ],
5608
                            ],
5609
    'Softbridge Basic'   => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
5610
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
5611
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5612
    # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
5613
    'Vala'               => [   
5614
                                [ 'remove_matches'      , '^\s*//' ], 
5615
                                [ 'call_regexp_common'  , 'C'      ],
5616
                                [ 'remove_inline'       , '//.*$'  ], 
5617
                            ],
5618
    'Vala Header'        => [   
5619
                                [ 'remove_matches'      , '^\s*//' ], 
5620
                                [ 'call_regexp_common'  , 'C'      ],
5621
                                [ 'remove_inline'       , '//.*$'  ], 
5622
                            ],
5623
    'Verilog-SystemVerilog' => [
5624
                                [ 'remove_matches'      , '^\s*//' ], 
5625
                                [ 'remove_inline'       , '//.*$'  ], 
5626
                                [ 'call_regexp_common'  , 'C'      ],
5627
                            ],
5628
    'VHDL'               => [   
5629
                                [ 'remove_matches'      , '^\s*--' ],
5630
                                [ 'remove_matches'      , '^\s*//' ], 
5631
                                [ 'call_regexp_common'  , 'C'      ], 
5632
                                [ 'remove_inline'       , '--.*$'  ],
5633
                                [ 'remove_inline'       , '//.*$'  ], 
5634
                            ],
5635
    'vim script'         => [   
5636
                                [ 'remove_matches'      , '^\s*"'  ], 
5637
                                [ 'remove_inline'       , '".*$'   ], 
5638
                            ],
5639
    'Visual Basic'       => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
5640
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
5641
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5642
    'Visualforce Component' => [
5643
                                [ 'remove_html_comments',          ],
5644
                                [ 'call_regexp_common'  , 'HTML'   ], 
5645
                            ],
5646
    'Visualforce Page'   => [
5647
                                [ 'remove_html_comments',          ],
5648
                                [ 'call_regexp_common'  , 'HTML'   ], 
5649
                            ],
5650
    'Velocity Template Language' => [
5651
                                [ 'remove_html_comments',          ],
5652
                                [ 'call_regexp_common'  , 'HTML'   ],
5653
                                [ 'remove_jsp_comments' ,          ], 
5654
                                [ 'remove_matches'      , '^\s*//' ],
5655
                                [ 'add_newlines'        ,          ],
5656
                                [ 'call_regexp_common'  , 'C'      ],
5657
                            ],                            
5658
    'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
5659
    'Windows Module Definition' => [
5660
                                [ 'remove_matches'      , '^\s*;' ],
5661
                                [ 'remove_inline'       , ';.*$'  ], 
5662
                            ],                            
5663
    'yacc'               => [   
5664
                                [ 'call_regexp_common'  , 'C'      ], 
5665
                                [ 'remove_matches'      , '^\s*//' ], 
5666
                                [ 'remove_inline'       , '//.*$'  ], 
5667
                            ],
5668
    'YAML'               => [   
5669
                                [ 'remove_matches'      , '^\s*#'  ], 
5670
                                [ 'remove_inline'       , '#.*$'   ], 
5671
                            ],
5672
    'lex'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5673
    'XAML'               => [   [ 'remove_html_comments',          ],
5674
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5675
    'xBase Header'       => [
5676
                                [ 'remove_matches'      , '^\s*//' ], 
5677
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5678
                                [ 'remove_matches'      , '^\s*\*' ], 
5679
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5680
                                [ 'remove_matches'      , '^\s*note' ], 
5681
                                [ 'remove_matches'      , '^\s*Note' ], 
5682
                                [ 'remove_inline'       , '//.*$'  ], 
5683
                                [ 'remove_inline'       , '\&\&.*$' ], 
5684
                                [ 'call_regexp_common'  , 'C'      ],
5685
                            ],
5686
    'xBase'              => [
5687
                                [ 'remove_matches'      , '^\s*//' ], 
5688
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5689
                                [ 'remove_matches'      , '^\s*\*' ], 
5690
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5691
                                [ 'remove_matches'      , '^\s*note' ], 
5692
                                [ 'remove_matches'      , '^\s*Note' ], 
5693
                                [ 'remove_inline'       , '//.*$'  ], 
5694
                                [ 'remove_inline'       , '\&\&.*$' ], 
5695
                                [ 'call_regexp_common'  , 'C'      ],
5696
                            ],
5697
    'MXML'               => [   
5698
                                [ 'remove_html_comments',          ],
5699
                                [ 'call_regexp_common'  , 'HTML'   ], 
5700
                                [ 'remove_matches'      , '^\s*//' ], 
5701
                                [ 'add_newlines'        ,          ], 
5702
                                [ 'call_regexp_common'  , 'C'      ], 
5703
                            ],
5704
    'Windows Message File'  => [
5705
                                [ 'remove_matches'      , '^\s*;\s*//' ], 
5706
                                [ 'call_regexp_common'  , 'C'          ], 
5707
                                [ 'remove_matches'      , '^\s*;\s*$'  ], 
5708
#                               next line only hypothetical
5709
#                               [ 'remove_matches_2re'  , '^\s*;\s*/\*',
5710
#                                                         '^\s*;\s*\*/', ],
5711
                            ],
5712
    'Windows Resource File' => [
5713
                                [ 'remove_matches'      , '^\s*//' ], 
5714
                                [ 'remove_inline'       , '//.*$'  ], 
5715
                                [ 'call_regexp_common'  , 'C'      ],
5716
                            ],
5717
    'WiX source'         => [
5718
                                [ 'remove_html_comments',          ],
5719
                                [ 'call_regexp_common'  , 'HTML'   ],
5720
                            ],
5721
    'WiX include'        => [
5722
                                [ 'remove_html_comments',          ],
5723
                                [ 'call_regexp_common'  , 'HTML'   ],
5724
                            ],
5725
    'WiX string localization' => [
5726
                                [ 'remove_html_comments',          ],
5727
                                [ 'call_regexp_common'  , 'HTML'   ],
5728
                            ],
5729
    'XML'                => [   
5730
                                [ 'remove_html_comments',          ],
5731
                                [ 'call_regexp_common'  , 'HTML'   ], 
5732
                            ],
5733
    'XQuery'             => [
5734
                                [ 'remove_between_general', '(:', ':)' ],
5735
                            ],
5736
    'XSD'                => [   [ 'remove_html_comments',          ],
5737
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5738
    'XSLT'               => [   [ 'remove_html_comments',          ],
5739
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5740
    'NAnt script'       => [   [ 'remove_html_comments',          ],
5741
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5742
    'MSBuild script'    => [   [ 'remove_html_comments',          ],
5743
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5744
    );
5745
# 1}}}
5746
%{$rh_EOL_continuation_re} = (               # {{{1
5747
    'ActionScript'       =>     '\\\\$'         ,
5748
    'Assembly'           =>     '\\\\$'         ,
5749
    'ASP'                =>     '\\\\$'         ,
5750
    'ASP.Net'            =>     '\\\\$'         ,
5751
    'Ada'                =>     '\\\\$'         ,
5752
    'awk'                =>     '\\\\$'         ,
5753
    'bc'                 =>     '\\\\$'         ,
5754
    'C'                  =>     '\\\\$'         ,
5755
    'C++'                =>     '\\\\$'         ,
5756
    'C/C++ Header'       =>     '\\\\$'         ,
5757
    'CMake'              =>     '\\\\$'         ,
5758
    'Cython'             =>     '\\\\$'         ,
5759
    'C#'                 =>     '\\\\$'         ,
5760
    'D'                  =>     '\\\\$'         ,
5761
    'Dart'               =>     '\\\\$'         ,
5762
    'Expect'             =>     '\\\\$'         ,
5763
    'Go'                 =>     '\\\\$'         ,
5764
    'IDL'                =>     '\$\\$'         ,
5765
    'Java'               =>     '\\\\$'         ,
5766
    'Javascript'         =>     '\\\\$'         ,
5767
    'LESS'               =>     '\\\\$'         ,
5768
    'Lua'                =>     '\\\\$'         ,
5769
    'make'               =>     '\\\\$'         ,
5770
    'MATLAB'             =>     '\.\.\.\s*$'    ,
5771
    'MXML'               =>     '\\\\$'         ,
5772
    'Objective C'        =>     '\\\\$'         ,
5773
    'Objective C++'      =>     '\\\\$'         ,
5774
    'OCaml'              =>     '\\\\$'         ,
5775
    'Octave'             =>     '\.\.\.\s*$'    ,
5776
    'Qt Project'         =>     '\\\\$'         ,
5777
    'Patran Command Language'=> '\\\\$'         ,
5778
    'PowerShell'         =>     '\\\\$'         ,
5779
    'Python'             =>     '\\\\$'         ,
5780
    'R'                  =>     '\\\\$'         ,
5781
    'Ruby'               =>     '\\\\$'         ,
5782
    'sed'                =>     '\\\\$'         ,
5783
    'Swift'              =>     '\\\\$'         ,
5784
    'Bourne Again Shell' =>     '\\\\$'         ,
5785
    'Bourne Shell'       =>     '\\\\$'         ,
5786
    'C Shell'            =>     '\\\\$'         ,
5787
    'Kermit'             =>     '\\\\$'         ,
5788
    'Korn Shell'         =>     '\\\\$'         ,
5789
    'Tcl/Tk'             =>     '\\\\$'         ,
5790
    'TypeScript'         =>     '\\\\$'         ,
5791
    'lex'                =>     '\\\\$'         ,
5792
    'Vala'               =>     '\\\\$'         ,
5793
    'Vala Header'        =>     '\\\\$'         ,
5794
    );
5795
# 1}}}
5796
%{$rh_Not_Code_Extension}    = (             # {{{1
5797
   '1'       => 1,  # Man pages (documentation):
5798
   '2'       => 1,
5799
   '3'       => 1,
5800
   '4'       => 1,
5801
   '5'       => 1,
5802
   '6'       => 1,
5803
   '7'       => 1,
5804
   '8'       => 1,
5805
   '9'       => 1,
5806
   'a'       => 1,  # Static object code.
5807
   'ad'      => 1,  # X application default resource file.
5808
   'afm'     => 1,  # font metrics
5809
   'arc'     => 1,  # arc(1) archive
5810
   'arj'     => 1,  # arj(1) archive
5811
   'au'      => 1,  # Audio sound filearj(1) archive
5812
   'bak'     => 1,  # Backup files - we only want to count the "real" files.
5813
   'bdf'     => 1,
5814
   'bmp'     => 1,
5815
   'bz2'     => 1,  # bzip2(1) compressed file
5816
   'csv'     => 1,  # comma separated values
5817
   'desktop' => 1,
5818
   'dic'     => 1,
5819
   'doc'     => 1,
5820
   'elc'     => 1,
5821
   'eps'     => 1,
5822
   'fig'     => 1,
5823
   'gif'     => 1,
5824
   'gz'      => 1,
5825
   'hdf'     => 1,  # hierarchical data format
5826
   'in'      => 1,  # Debatable.
5827
   'jpg'     => 1,
5828
   'kdelnk'  => 1,
5829
   'man'     => 1,
5830
   'mf'      => 1,
5831
   'mp3'     => 1,
5832
   'n'       => 1,
5833
   'o'       => 1,  # Object code is generated from source code.
5834
   'pbm'     => 1,
5835
   'pdf'     => 1,
5836
   'pfb'     => 1,
5837
   'png'     => 1,
5838
   'po'      => 1,
5839
   'ps'      => 1,  # Postscript is _USUALLY_ generated automatically.
5840
   'sgm'     => 1,
5841
   'sgml'    => 1,
5842
   'so'      => 1,  # Dynamically-loaded object code.
5843
   'Tag'     => 1,
5844
   'tex'     => 1,
5845
   'text'    => 1,
5846
   'tfm'     => 1,
5847
   'tgz'     => 1,  # gzipped tarball
5848
   'tiff'    => 1,
5849
   'txt'     => 1, 
5850
   'vf'      => 1,
5851
   'wav'     => 1,
5852
   'xbm'     => 1,
5853
   'xpm'     => 1,
5854
   'Y'       => 1,  # file compressed with "Yabba"
5855
   'Z'       => 1,  # file compressed with "compress"
5856
   'zip'     => 1,  # zip archive
5857
); # 1}}}
5858
%{$rh_Not_Code_Filename}     = (             # {{{1
5859
   'AUTHORS'     => 1,
5860
   'BUGS'        => 1,
5861
   'BUGS'        => 1,
5862
   'Changelog'   => 1,
5863
   'ChangeLog'   => 1,
5864
   'ChangeLog'   => 1,
5865
   'Changes'     => 1,
5866
   'CHANGES'     => 1,
5867
   'COPYING'     => 1,
5868
   'COPYING'     => 1,
5869
   '.cvsignore'  => 1,
5870
   'Entries'     => 1,
5871
   'FAQ'         => 1,
5872
   'iconfig.h'   => 1, # Skip "iconfig.h" files; they're used in Imakefiles.
5873
   'INSTALL'     => 1,
5874
   'MAINTAINERS' => 1,
5875
   'MD5SUMS'     => 1,
5876
   'NEWS'        => 1,
5877
   'readme'      => 1,
5878
   'Readme'      => 1,
5879
   'README'      => 1,
5880
   'README.tk'   => 1, # used in kdemultimedia, it's confusing.
5881
   'Repository'  => 1,
5882
   'Root'        => 1, # CVS
5883
   'TODO'        => 1,
5884
);
5885
# 1}}}
5886
%{$rh_Scale_Factor}          = (             # {{{1
5887
    '(unknown)'                    =>   0.00,
5888
    '1032/af'                      =>   5.00,
5889
    '1st generation default'       =>   0.25,
5890
    '2nd generation default'       =>   0.75,
5891
    '3rd generation default'       =>   1.00,
5892
    '4th generation default'       =>   4.00,
5893
    '5th generation default'       =>  16.00,
5894
    'aas macro'                    =>   0.88,
5895
    'abap/4'                       =>   5.00,
5896
    'ABAP'                         =>   5.00,
5897
    'accel'                        =>   4.21,
5898
    'access'                       =>   2.11,
5899
    'ActionScript'                 =>   1.36,
5900
    'actor'                        =>   3.81,
5901
    'acumen'                       =>   2.86,
5902
    'Ada'                          =>   0.52,
5903
    'Ada 83'                       =>   1.13,
5904
    'Ada 95'                       =>   1.63,
5905
    'adr/dl'                       =>   2.00,
5906
    'adr/ideal/pdl'                =>   4.00,
5907
    'ads/batch'                    =>   4.00,
5908
    'ads/online'                   =>   4.00,
5909
    'ADSO/IDSM'                    =>   3.00,
5910
    'advantage'                    =>   2.11,
5911
    'ai shell default'             =>   1.63,
5912
    'ai shells'                    =>   1.63,
5913
    'algol 68'                     =>   0.75,
5914
    'algol w'                      =>   0.75,
5915
    'ambush'                       =>   2.50,
5916
    'aml'                          =>   1.63,
5917
    'AMPLE'                        =>   2.00,
5918
    'Ant/XML'                      =>   1.90,
5919
    'Ant'                          =>   1.90,
5920
    'amppl ii'                     =>   1.25,
5921
    'ansi basic'                   =>   1.25,
5922
    'ansi cobol 74'                =>   0.75,
5923
    'ansi cobol 85'                =>   0.88,
5924
    'SQL'                          =>   6.15,
5925
    'SQL Stored Procedure'         =>   6.15,
5926
    'SQL Data'                     =>   1.00,
5927
    'answer/db'                    =>   6.15,
5928
    'apl 360/370'                  =>   2.50,
5929
    'apl default'                  =>   2.50,
5930
    'apl*plus'                     =>   2.50,
5931
    'applesoft basic'              =>   0.63,
5932
    'application builder'          =>   4.00,
5933
    'application manager'          =>   2.22,
5934
    'aps'                          =>   0.96,
5935
    'aps'                          =>   4.71,
5936
    'apt'                          =>   1.13,
5937
    'aptools'                      =>   4.00,
5938
    'arc'                          =>   1.63,
5939
    'ariel'                        =>   0.75,
5940
    'arity'                        =>   1.63,
5941
    'art'                          =>   1.63,
5942
    'art enterprise'               =>   1.74,
5943
    'artemis'                      =>   2.00,
5944
    'artim'                        =>   1.74,
5945
    'as/set'                       =>   4.21,
5946
    'asi/inquiry'                  =>   6.15,
5947
    'ask windows'                  =>   1.74,
5948
    'asa'                          =>   1.29,
5949
    'ASP'                          =>   1.29,
5950
    'ASP.Net'                      =>   1.29,
5951
    'aspx'                         =>   1.29,
5952
    'asax'                         =>   1.29,
5953
    'ascx'                         =>   1.29,
5954
    'asmx'                         =>   1.29,
5955
    'config'                       =>   1.29,
5956
    'webinfo'                      =>   1.29,
5957
    'CCS'                          =>   5.33,
5958
    'Apex Trigger'                 =>   1.4 ,
5959
    'Arduino Sketch'               =>   1.00,
5960
    'Assembly'                     =>   0.25,
5961
    'Assembly (macro)'             =>   0.51,
5962
    'associative default'          =>   1.25,
5963
    'autocoder'                    =>   0.25,
5964
    'AutoHotkey'                   =>   1.29,
5965
    'awk'                          =>   3.81,
5966
    'aztec c'                      =>   0.63,
5967
    'balm'                         =>   0.75,
5968
    'base sas'                     =>   1.51,
5969
    'basic'                        =>   0.75,
5970
    'basic a'                      =>   0.63,
5971
    'bc'                           =>   1.50,
5972
    'berkeley pascal'              =>   0.88,
5973
    'better basic'                 =>   0.88,
5974
    'bliss'                        =>   0.75,
5975
    'bmsgen'                       =>   2.22,
5976
    'boeingcalc'                   =>  13.33,
5977
    'bteq'                         =>   6.15,
5978
    'C'                            =>   0.77,
5979
    'c set 2'                      =>   0.88,
5980
    'C#'                           =>   1.36,
5981
    'C++'                          =>   1.51,
5982
    'c86plus'                      =>   0.63,
5983
    'cadbfast'                     =>   2.00,
5984
    'caearl'                       =>   2.86,
5985
    'cast'                         =>   1.63,
5986
    'cbasic'                       =>   0.88,
5987
    'cdadl'                        =>   4.00,
5988
    'cellsim'                      =>   1.74,
5989
    'ColdFusion'                   =>   4.00,
5990
    'ColdFusion CFScript'          =>   4.00,
5991
    'chili'                        =>   0.75,
5992
    'chill'                        =>   0.75,
5993
    'cics'                         =>   1.74,
5994
    'clarion'                      =>   1.38,
5995
    'clascal'                      =>   1.00,
5996
    'cli'                          =>   2.50,
5997
    'clipper'                      =>   2.05,
5998
    'clipper db'                   =>   2.00,
5999
    'clos'                         =>   3.81,
6000
    'Clojure'                      =>   1.25,
6001
    'ClojureScript'                =>   1.25,
6002
    'clout'                        =>   2.00,
6003
    'CMake'                        =>   1.00,
6004
    'cms2'                         =>   0.75,
6005
    'cmsgen'                       =>   4.21,
6006
    'COBOL'                        =>   1.04,
6007
    'COBOL ii'                     =>   0.75,
6008
    'COBOL/400'                    =>   0.88,
6009
    'cobra'                        =>   4.00,
6010
    'codecenter'                   =>   2.22,
6011
    'cofac'                        =>   2.22,
6012
    'CoffeeScript'                 =>   2.00,
6013
    'cogen'                        =>   2.22,
6014
    'cognos'                       =>   2.22,
6015
    'cogo'                         =>   1.13,
6016
    'comal'                        =>   1.00,
6017
    'comit ii'                     =>   1.25,
6018
    'common lisp'                  =>   1.25,
6019
    'concurrent pascal'            =>   1.00,
6020
    'conniver'                     =>   1.25,
6021
    'cool:gen/ief'                 =>   2.58,
6022
    'coral 66'                     =>   0.75,
6023
    'corvet'                       =>   4.21,
6024
    'corvision'                    =>   5.33,
6025
    'cpl'                          =>   0.50,
6026
    'Crystal Reports'              =>   4.00,
6027
    'csl'                          =>   1.63,
6028
    'csp'                          =>   1.51,
6029
    'cssl'                         =>   1.74,
6030
    'CSS'                          => 1.0,
6031
    'culprit'                      =>   1.57,
6032
    'CUDA'                         =>   1.00,
6033
    'cxpert'                       =>   1.63,
6034
    'cygnet'                       =>   4.21,
6035
    'D'                            =>   1.70,
6036
    'DAL'                          =>   1.50,
6037
    'Dart'                         =>   2.00,
6038
    'data base default'            =>   2.00,
6039
    'dataflex'                     =>   2.00,
6040
    'datatrieve'                   =>   4.00,
6041
    'dbase iii'                    =>   2.00,
6042
    'dbase iv'                     =>   1.54,
6043
    'dcl'                          =>   0.38,
6044
    'diff'                         =>   1.00,
6045
    'decision support default'     =>   2.22,
6046
    'decrally'                     =>   2.00,
6047
    'delphi'                       =>   2.76,
6048
    'DITA'                         =>   1.90,
6049
    'dl/1'                         =>   2.00,
6050
    'dtrace'                       =>   2.00,
6051
    'NASTRAN DMAP'                 =>   2.35,
6052
    'dna4'                         =>   4.21,
6053
    'DOORS Extension Language'     =>   1.50,
6054
    'DOS Batch'                    =>   0.63,
6055
    'dsp assembly'                 =>   0.50,
6056
    'dtabl'                        =>   1.74,
6057
    'dtipt'                        =>   1.74,
6058
    'dyana'                        =>   1.13,
6059
    'dynamoiii'                    =>   1.74,
6060
    'easel'                        =>   2.76,
6061
    'easy'                         =>   1.63,
6062
    'easytrieve+'                  =>   2.35,
6063
    'eclipse'                      =>   1.63,
6064
    'ECPP'                         =>   1.90,
6065
    'eda/sql'                      =>   6.67,
6066
    'edscheme 3.4'                 =>   1.51,
6067
    'eiffel'                       =>   3.81,
6068
    'Elixir'                       =>   2.11,
6069
    'enform'                       =>   1.74,
6070
    'englishbased default'         =>   1.51,
6071
    'ensemble'                     =>   2.76,
6072
    'epos'                         =>   4.00,
6073
    'ERB'                          =>   2.00,
6074
    'Erlang'                       =>   2.11,
6075
    'esf'                          =>   2.00,
6076
    'espadvisor'                   =>   1.63,
6077
    'espl/i'                       =>   1.13,
6078
    'euclid'                       =>   0.75,
6079
    'excel'                        =>   1.74,
6080
    'excel 12'                     =>  13.33,
6081
    'excel 34'                     =>  13.33,
6082
    'excel 5'                      =>  13.33,
6083
    'express'                      =>   2.22,
6084
    'exsys'                        =>   1.63,
6085
    'extended common lisp'         =>   1.43,
6086
    'eznomad'                      =>   2.22,
6087
    'facets'                       =>   4.00,
6088
    'factorylink iv'               =>   2.76,
6089
    'fame'                         =>   2.22,
6090
    'filemaker pro'                =>   2.22,
6091
    'flavors'                      =>   2.76,
6092
    'flex'                         =>   1.74,
6093
    'flexgen'                      =>   2.76,
6094
    'Focus'                        =>   1.90,
6095
    'foil'                         =>   1.51,
6096
    'forte'                        =>   4.44,
6097
    'forth'                        =>   1.25,
6098
    'Fortran 66'                   =>   0.63,
6099
    'Fortran 77'                   =>   0.75,
6100
    'Fortran 90'                   =>   1.00,
6101
    'Fortran 95'                   =>   1.13,
6102
    'Fortran II'                   =>   0.63,
6103
    'foundation'                   =>   2.76,
6104
    'foxpro'                       =>   2.29,
6105
    'foxpro 1'                     =>   2.00,
6106
    'foxpro 2.5'                   =>   2.35,
6107
    'framework'                    =>  13.33,
6108
    'F#'                           =>   2.50,
6109
    'g2'                           =>   1.63,
6110
    'gamma'                        =>   5.00,
6111
    'genascript'                   =>   2.96,
6112
    'gener/ol'                     =>   6.15,
6113
    'genexus'                      =>   5.33,
6114
    'genifer'                      =>   4.21,
6115
    'geode 2.0'                    =>   5.00,
6116
    'gfa basic'                    =>   2.35,
6117
    'gml'                          =>   1.74,
6118
    'golden common lisp'           =>   1.25,
6119
    'gpss'                         =>   1.74,
6120
    'guest'                        =>   2.86,
6121
    'guru'                         =>   1.63,
6122
    'Go'                           =>   2.50,
6123
    'Grails'                       =>   1.48,
6124
    'Groovy'                       =>   4.10,
6125
    'gw basic'                     =>   0.82,
6126
    'Harbour'                      =>   2.00,
6127
    'Haskell'                      =>   2.11,
6128
    'high c'                       =>   0.63,
6129
    'hlevel'                       =>   1.38,
6130
    'hp basic'                     =>   0.63,
6131
    'Haml'                         =>   2.50,
6132
    'Handlebars'                   =>   2.50,
6133
    'HTML'                         =>   1.90,
6134
    'XML'                          =>   1.90,
6135
    'MXML'                         =>   1.90,
6136
    'XSLT'                         =>   1.90,
6137
    'DTD'                          =>   1.90,
6138
    'XSD'                          =>   1.90,
6139
    'NAnt script'                  =>   1.90,
6140
    'MSBuild script'               =>   1.90, 
6141
    'HLSL'                         =>   2.00,
6142
    'HTML 2'                       =>   5.00,
6143
    'HTML 3'                       =>   5.33,
6144
    'huron'                        =>   5.00,
6145
    'ibm adf i'                    =>   4.00,
6146
    'ibm adf ii'                   =>   4.44,
6147
    'ibm advanced basic'           =>   0.82,
6148
    'ibm cics/vs'                  =>   2.00,
6149
    'ibm compiled basic'           =>   0.88,
6150
    'ibm vs cobol'                 =>   0.75,
6151
    'ibm vs cobol ii'              =>   0.88,
6152
    'ices'                         =>   1.13,
6153
    'icon'                         =>   1.00,
6154
    'ideal'                        =>   1.54,
6155
    'idms'                         =>   2.00,
6156
    'ief'                          =>   5.71,
6157
    'ief/cool:gen'                 =>   2.58,
6158
    'iew'                          =>   5.71,
6159
    'ifps/plus'                    =>   2.50,
6160
    'imprs'                        =>   2.00,
6161
    'informix'                     =>   2.58,
6162
    'ingres'                       =>   2.00,
6163
    'inquire'                      =>   6.15,
6164
    'insight2'                     =>   1.63,
6165
    'install/1'                    =>   5.00,
6166
    'InstallShield'                =>   1.90,
6167
    'intellect'                    =>   1.51,
6168
    'interlisp'                    =>   1.38,
6169
    'interpreted basic'            =>   0.75,
6170
    'interpreted c'                =>   0.63,
6171
    'iqlisp'                       =>   1.38,
6172
    'iqrp'                         =>   6.15,
6173
    'j2ee'                         =>   1.60,
6174
    'janus'                        =>   1.13,
6175
    'Java'                         =>   1.36,
6176
    'Javascript'                   =>   1.48,
6177
    'JavaServer Faces'             =>   1.5 ,
6178
    'JSON'                         =>   2.50,
6179
    'JSP'                          =>   1.48,
6180
    'Velocity Template Language'   =>   1.00,
6181
    'JCL'                          =>   1.67,
6182
    'joss'                         =>   0.75,
6183
    'jovial'                       =>   0.75,
6184
    'jsp'                          =>   1.36,
6185
    'kappa'                        =>   2.00,
6186
    'kbms'                         =>   1.63,
6187
    'kcl'                          =>   1.25,
6188
    'kee'                          =>   1.63,
6189
    'keyplus'                      =>   2.00,
6190
    'kl'                           =>   1.25,
6191
    'klo'                          =>   1.25,
6192
    'knowol'                       =>   1.63,
6193
    'krl'                          =>   1.38,
6194
    'Kermit'                       =>   2.00,
6195
    'Korn Shell'                   =>   3.81,
6196
    'Kotlin'                       =>   2.00,
6197
    'ladder logic'                 =>   2.22,
6198
    'lambit/l'                     =>   1.25,
6199
    'lattice c'                    =>   0.63,
6200
    'LESS'                         =>   1.50,
6201
    'liana'                        =>   0.63,
6202
    'lilith'                       =>   1.13,
6203
    'linc ii'                      =>   5.71,
6204
    'Lisp'                         =>   1.25,
6205
    'LiveLink OScript'             =>   3.5 ,
6206
    'loglisp'                      =>   1.38,
6207
    'loops'                        =>   3.81,
6208
    'lotus 123 dos'                =>  13.33,
6209
    'lotus macros'                 =>   0.75,
6210
    'lotus notes'                  =>   3.64,
6211
    'lucid 3d'                     =>  13.33,
6212
    'lyric'                        =>   1.51,
6213
    'm4'                           =>   1.00,
6214
    'm'                            =>   5.00,
6215
    'macforth'                     =>   1.25,
6216
    'mach1'                        =>   2.00,
6217
    'machine language'             =>   0.13,
6218
    'maestro'                      =>   5.00,
6219
    'magec'                        =>   5.00,
6220
    'magik'                        =>   3.81,
6221
    'Lake'                         =>   3.81,
6222
    'make'                         =>   2.50,
6223
    'mantis'                       =>   2.96,
6224
    'mapper'                       =>   0.99,
6225
    'mark iv'                      =>   2.00,
6226
    'mark v'                       =>   2.22,
6227
    'mathcad'                      =>  16.00,
6228
    'Maven'                        =>   1.90,
6229
    'mdl'                          =>   2.22,
6230
    'mentor'                       =>   1.51,
6231
    'mesa'                         =>   0.75,
6232
    'microfocus cobol'             =>   1.00,
6233
    'microforth'                   =>   1.25,
6234
    'microsoft c'                  =>   0.63,
6235
    'microstep'                    =>   4.00,
6236
    'miranda'                      =>   2.00,
6237
    'model 204'                    =>   2.11,
6238
    'modula 2'                     =>   1.00,
6239
    'mosaic'                       =>  13.33,
6240
    # 'ms c ++ v. 7'                 =>   1.51,
6241
    'ms compiled basic'            =>   0.88,
6242
    'msl'                          =>   1.25,
6243
    'mulisp'                       =>   1.25,
6244
    'MUMPS'                        =>   4.21,
6245
    'Mustache'                     =>   1.75,
6246
    'Nastran'                      =>   1.13,
6247
    'natural'                      =>   1.54,
6248
    'natural 1'                    =>   1.51,
6249
    'natural 2'                    =>   1.74,
6250
    'natural construct'            =>   3.20,
6251
    'natural language'             =>   0.03,
6252
    'netron/cap'                   =>   4.21,
6253
    'nexpert'                      =>   1.63,
6254
    'nial'                         =>   1.63,
6255
    'nomad2'                       =>   2.00,
6256
    'nonprocedural default'        =>   2.22,
6257
    'notes vip'                    =>   2.22,
6258
    'nroff'                        =>   1.51,
6259
    'object assembler'             =>   1.25,
6260
    'object lisp'                  =>   2.76,
6261
    'object logo'                  =>   2.76,
6262
    'object pascal'                =>   2.76,
6263
    'object star'                  =>   5.00,
6264
    'Objective C'                  =>   2.96,
6265
    'Objective C++'                =>   2.96,
6266
    'objectoriented default'       =>   2.76,
6267
    'objectview'                   =>   3.20,
6268
    'OCaml'                        =>   3.00,
6269
    'ogl'                          =>   1.00,
6270
    'omnis 7'                      =>   2.00,
6271
    'oodl'                         =>   2.76,
6272
    'ops'                          =>   1.74,
6273
    'ops5'                         =>   1.38,
6274
    'oracle'                       =>   2.76,
6275
    'Oracle Reports'               =>   2.76,
6276
    'Oracle Forms'                 =>   2.67,
6277
    'Oracle Developer/2000'        =>   3.48,
6278
    'oscar'                        =>   0.75,
6279
    'pacbase'                      =>   1.67,
6280
    'pace'                         =>   2.00,
6281
    'paradox/pal'                  =>   2.22,
6282
    'Pascal'                       =>   0.88,
6283
    'Patran Command Language'      =>   2.50,
6284
    'pc focus'                     =>   2.22,
6285
    'pdl millenium'                =>   3.81,
6286
    'pdp11 ade'                    =>   1.51,
6287
    'peoplesoft'                   =>   2.50,
6288
    'Perl'                         =>   4.00,
6289
    'persistance object builder'   =>   3.81,
6290
    'Pig Latin'                    =>   1.00,
6291
    'pilot'                        =>   1.51,
6292
    'PL/I'                         =>   1.38,
6293
    'pl/1'                         =>   1.38,
6294
    'pl/m'                         =>   1.13,
6295
    'pl/s'                         =>   0.88,
6296
    'pl/sql'                       =>   2.58,
6297
    'planit'                       =>   1.51,
6298
    'planner'                      =>   1.25,
6299
    'planperfect 1'                =>  11.43,
6300
    'plato'                        =>   1.51,
6301
    'polyforth'                    =>   1.25,
6302
    'pop'                          =>   1.38,
6303
    'poplog'                       =>   1.38,
6304
    'power basic'                  =>   1.63,
6305
    'powerbuilder'                 =>   3.33,
6306
    'powerhouse'                   =>   5.71,
6307
    'PowerShell'                   =>   3.00,
6308
    'ppl (plus)'                   =>   2.00,
6309
    'problemoriented default'      =>   1.13,
6310
    'proc'                         =>   2.96,
6311
    'procedural default'           =>   0.75,
6312
    'professional pascal'          =>   0.88,
6313
    'program generator default'    =>   5.00,
6314
    'progress v4'                  =>   2.22,
6315
    'proiv'                        =>   1.38,
6316
    'Prolog'                       =>   1.25,
6317
    'prose'                        =>   0.75,
6318
    'proteus'                      =>   0.75,
6319
    'Protocol Buffers'             =>   2.00,
6320
    'Puppet'                       =>   2.00,
6321
    'PureScript'                   =>   2.00,
6322
    'qbasic'                       =>   1.38,
6323
    'qbe'                          =>   6.15,
6324
    'qmf'                          =>   5.33,
6325
    'QML'                          =>   1.25,
6326
    'Qt Project'                   =>   1.00,
6327
    'qnial'                        =>   1.63,
6328
    'quattro'                      =>  13.33,
6329
    'quattro pro'                  =>  13.33,
6330
    'query default'                =>   6.15,
6331
    'quick basic 1'                =>   1.25,
6332
    'quick basic 2'                =>   1.31,
6333
    'quick basic 3'                =>   1.38,
6334
    'quick c'                      =>   0.63,
6335
    'quickbuild'                   =>   2.86,
6336
    'quiz'                         =>   5.33,
6337
    'R'                            =>   3.00,
6338
    'Racket'                       =>   1.50,
6339
    'rally'                        =>   2.00,
6340
    'ramis ii'                     =>   2.00,
6341
    'rapidgen'                     =>   2.86,
6342
    'ratfor'                       =>   0.88,
6343
    'rdb'                          =>   2.00,
6344
    'realia'                       =>   1.74,
6345
    'realizer 1.0'                 =>   2.00,
6346
    'realizer 2.0'                 =>   2.22,
6347
    'relate/3000'                  =>   2.00,
6348
    'reuse default'                =>  16.00,
6349
    'Razor'                        =>   2.00,
6350
    'Rexx'                         =>   1.19,
6351
    'rm basic'                     =>   0.88,
6352
    'rm cobol'                     =>   0.75,
6353
    'rm fortran'                   =>   0.75,
6354
    'RobotFramework'               =>   2.50,
6355
    'rpg i'                        =>   1.00,
6356
    'rpg ii'                       =>   1.63,
6357
    'rpg iii'                      =>   1.63,
6358
    'rtexpert 1.4'                 =>   1.38,
6359
    'Rust'                         =>   1.00,
6360
    'sabretalk'                    =>   0.90,
6361
    'sail'                         =>   0.75,
6362
    'sapiens'                      =>   5.00,
6363
    'sas'                          =>   1.95,
6364
    'savvy'                        =>   6.15,
6365
    'sbasic'                       =>   0.88,
6366
    'Scala'                        =>   4.10,
6367
    'sceptre'                      =>   1.13,
6368
    'scheme'                       =>   1.51,
6369
    'screen painter default'       =>  13.33,
6370
    'sequal'                       =>   6.67,
6371
    'Bourne Shell'                 =>   3.81,
6372
    'Bourne Again Shell'           =>   3.81,
6373
    'ksh'                          =>   3.81,
6374
    'C Shell'                      =>   3.81,
6375
    'siebel tools '                =>   6.15,
6376
    'SAS'                          =>   1.5 ,
6377
    'SASS'                         =>   1.5 ,
6378
    'simplan'                      =>   2.22,
6379
    'simscript'                    =>   1.74,
6380
    'simula'                       =>   1.74,
6381
    'simula 67'                    =>   1.74,
6382
    'simulation default'           =>   1.74,
6383
    'SKILL'                        =>   2.00,
6384
    'SKILL++'                      =>   2.00,
6385
    'slogan'                       =>   0.98,
6386
    'smalltalk'                    =>   2.50,
6387
    'smalltalk 286'                =>   3.81,
6388
    'smalltalk 80'                 =>   3.81,
6389
    'smalltalk/v'                  =>   3.81,
6390
    'Smarty'                       =>   3.50,
6391
    'snap'                         =>   1.00,
6392
    'snobol24'                     =>   0.63,
6393
    'softscreen'                   =>   5.71,
6394
    'Softbridge Basic'             =>   2.76,
6395
    'solo'                         =>   1.38,
6396
    'speakeasy'                    =>   2.22,
6397
    'spinnaker ppl'                =>   2.22,
6398
    'splus'                        =>   2.50,
6399
    'spreadsheet default'          =>  13.33,
6400
    'sps'                          =>   0.25,
6401
    'spss'                         =>   2.50,
6402
    'SQL'                          =>   2.29,
6403
    'sqlwindows'                   =>   6.67,
6404
    'statistical default'          =>   2.50,
6405
    'Standard ML'                  =>   3.00,
6406
    'strategem'                    =>   2.22,
6407
    'stress'                       =>   1.13,
6408
    'strongly typed default'       =>   0.88,
6409
    'style'                        =>   1.74,
6410
    'superbase 1.3'                =>   2.22,
6411
    'surpass'                      =>  13.33,
6412
    'Swift'                        =>   2.50,
6413
    'sybase'                       =>   2.00,
6414
    'symantec c++'                 =>   2.76,
6415
    'symbolang'                    =>   1.25,
6416
    'synchroworks'                 =>   4.44,
6417
    'synon/2e'                     =>   4.21,
6418
    'systemw'                      =>   2.22,
6419
    'tandem access language'       =>   0.88,
6420
    'Tcl/Tk'                       =>   4.00,
6421
    'Teamcenter def'               =>   1.00,
6422
    'Teamcenter met'               =>   1.00,
6423
    'Teamcenter mth'               =>   1.00,
6424
    'telon'                        =>   5.00,
6425
    'tessaract'                    =>   2.00,
6426
    'the twin'                     =>  13.33,
6427
    'themis'                       =>   6.15,
6428
    'tiief'                        =>   5.71,
6429
    'Titanium Style Sheet'         =>   2.00,
6430
    'topspeed c++'                 =>   2.76,
6431
    'transform'                    =>   5.33,
6432
    'translisp plus'               =>   1.43,
6433
    'treet'                        =>   1.25,
6434
    'treetran'                     =>   1.25,
6435
    'trs80 basic'                  =>   0.63,
6436
    'true basic'                   =>   1.25,
6437
    'turbo c'                      =>   0.63,
6438
    'turbo expert'                 =>   1.63,
6439
    'turbo pascal >5'              =>   1.63,
6440
    'turbo pascal 14'              =>   1.00,
6441
    'turbo pascal 45'              =>   1.13,
6442
    'turing'                       =>   1.00,
6443
    'tutor'                        =>   1.51,
6444
    'twaice'                       =>   1.63,
6445
    'TypeScript'                   =>   2.00,
6446
    'ucsd pascal'                  =>   0.88,
6447
    'ufo/ims'                      =>   2.22,
6448
    'uhelp'                        =>   2.50,
6449
    'uniface'                      =>   5.00,
6450
    'Unity-Prefab'                 =>   2.50,
6451
    'Vala'                         =>   1.50,
6452
    'Vala Header'                  =>   1.40,
6453
    'vax acms'                     =>   1.38,
6454
    'vax ade'                      =>   2.00,
6455
    'vbscript'                     =>   2.35,
6456
    'vectran'                      =>   0.75,
6457
    'Verilog-SystemVerilog'        =>   1.51,
6458
    'VHDL'                         =>   4.21,
6459
    'vim script'                   =>   3.00,
6460
    'visible c'                    =>   1.63,
6461
    'visible cobol'                =>   2.00,
6462
    'visicalc 1'                   =>   8.89,
6463
    'visual 4.0'                   =>   2.76,
6464
    'visual basic'                 =>   1.90,
6465
    'visual basic 1'               =>   1.74,
6466
    'visual basic 2'               =>   1.86,
6467
    'visual basic 3'               =>   2.00,
6468
    'visual basic 4'               =>   2.22,
6469
    'visual basic 5'               =>   2.76,
6470
    'Visual Basic'                 =>   2.76,
6471
    'visual basic dos'             =>   2.00,
6472
    'visual c++'                   =>   2.35,
6473
    'visual cobol'                 =>   4.00,
6474
    'Visual Fox Pro'               =>   4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
6475
    'visual objects'               =>   5.00,
6476
    'visualage'                    =>   3.81,
6477
    'Visualforce Component'        =>   1.9 ,
6478
    'Visualforce Page'             =>   1.9 ,
6479
    'visualgen'                    =>   4.44,
6480
    'VM'                           =>   2.00,
6481
    'vpf'                          =>   0.84,
6482
    'vulcan'                       =>   1.25,
6483
    'vz programmer'                =>   2.22,
6484
    'warp x'                       =>   2.00,
6485
    'watcom c'                     =>   0.63,
6486
    'watcom c/386'                 =>   0.63,
6487
    'waterloo c'                   =>   0.63,
6488
    'waterloo pascal'              =>   0.88,
6489
    'watfiv'                       =>   0.94,
6490
    'watfor'                       =>   0.88,
6491
    'web scripts'                  =>   5.33,
6492
    'whip'                         =>   0.88,
6493
    'Windows Message File'         =>   1.00,
6494
    'Windows Resource File'        =>   1.00,
6495
    'Windows Module Definition'    =>   1.00,
6496
    'WiX source'                   =>   1.90,
6497
    'WiX include'                  =>   1.90,
6498
    'WiX string localization'      =>   1.90,
6499
    'wizard'                       =>   2.86,
6500
    'xBase'                        =>   2.00,
6501
    'xBase Header'                 =>   2.00,
6502
    'xlisp'                        =>   1.25,
6503
    'XAML'                         =>   1.90,
6504
    'XQuery'                       =>   2.50,
6505
    'yacc'                         =>   1.51,
6506
    'yacc++'                       =>   1.51,
6507
    'YAML'                         =>   0.90,
6508
    'zbasic'                       =>   0.88,
6509
    'zim'                          =>   4.21,
6510
    'zlisp'                        =>   1.25,
6511
    'Expect'                       => 2.00,
6512
    'C/C++ Header'                 => 1.00, 
6513
    'inc'                          => 1.00,
6514
    'lex'                          => 1.00,
6515
    'Julia'                        => 4.00,
6516
    'MATLAB'                       => 4.00,
6517
    'Mercury'                      => 3.00,
6518
    'Maven/XML'                    => 2.5,
6519
    'IDL'                          => 3.80,
6520
    'Octave'                       => 4.00,
6521
    'ML'                           => 3.00,
6522
    'Modula3'                      => 2.00,
6523
    'PHP'                          => 3.50,
6524
    'Python'                       => 4.20,
6525
    'Cython'                       => 3.80,
6526
    'Ruby'                         => 4.20,
6527
    'Ruby HTML'                    => 4.00,
6528
    'sed'                          => 4.00,
6529
    'Lua'                          => 4.00,
6530
    'OpenCL'                       => 1.50,
6531
#   'Lisp/Julia'                   => 4.00,
6532
#   'Lisp/OpenCL'                  => 1.50,
6533
#   'MATLAB/Objective C/MUMPS/Mercury' => 3.00,
6534
);
6535
# 1}}}
6536
%{$rh_Known_Binary_Archives} = (             # {{{1
6537
            '.tar'     => 1 ,
6538
            '.tar.Z'   => 1 ,
6539
            '.tar.gz'  => 1 ,
6540
            '.tar.bz2' => 1 ,
6541
            '.zip'     => 1 ,
6542
            '.Zip'     => 1 ,
6543
            '.ZIP'     => 1 ,
6544
            '.ear'     => 1 ,  # Java
6545
            '.war'     => 1 ,  # contained within .ear
6546
            '.xz'      => 1 ,
6547
            );
6548
# 1}}}
6549
} # end sub set_constants()
6550
sub check_scale_existence {                  # {{{1
6551
    # do a few sanity checks
6552
    my ($rhaa_Filters_by_Language, 
6553
        $rh_Language_by_Extension,
6554
        $rh_Scale_Factor) = @_;
6555
 
6556
    my %extension_collisions = (
6557
        # TODO:  find a better way of dealing with these
6558
        "PHP/Pascal"                        => 1,
6559
        "Lisp/OpenCL"                       => 1,
6560
        "Lisp/Julia"                        => 1,
6561
        "MATLAB/Objective C/MUMPS/Mercury"  => 1,
6562
        "Pascal/Puppet"                     => 1,
6563
        "Perl/Prolog"                       => 1,
6564
        "IDL/Qt Project/Prolog"             => 1,
6565
        "D/dtrace"                          => 1,
6566
    );
6567
    my $OK = 1;
6568
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
6569
        next if defined $extension_collisions{$language};
6570
        if (!defined $rh_Scale_Factor->{$language}) {
6571
            $OK = 0;
6572
            warn "Missing scale factor for $language\n";
6573
        }
6574
    }
6575
 
6576
    my %seen_it = ();
6577
    foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
6578
        my $language = $rh_Language_by_Extension->{$ext};
6579
        next if defined $extension_collisions{$language};
6580
        next if $seen_it{$language};
6581
        if (!@{$rhaa_Filters_by_Language->{$language}}) {
6582
            $OK = 0;
6583
            warn "Missing language filter for $language\n";
6584
        }
6585
        $seen_it{$language} = 1;
6586
    }
6587
    die unless $OK;
6588
} # 1}}}
6589
sub Install_Regexp_Common {                  # {{{1
6590
    # Installs portions of Damian Conway's & Abigail's Regexp::Common
6591
    # module, v2.120, into a temporary directory for the duration of
6592
    # this run.
6593
 
6594
    my %Regexp_Common_Contents = ();
6595
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
6596
package Regexp::Common;
6597
 
6598
use 5.00473;
6599
use strict;
6600
 
6601
local $^W = 1;
6602
 
6603
use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
6604
 
6605
($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/;
6606
 
6607
 
6608
sub _croak {
6609
    require Carp;
6610
    goto &Carp::croak;
6611
}
6612
 
6613
sub _carp {
6614
    require Carp;
6615
    goto &Carp::carp;
6616
}
6617
 
6618
sub new {
6619
    my ($class, @data) = @_;
6620
    my %self;
6621
    tie %self, $class, @data;
6622
    return \%self;
6623
}
6624
 
6625
sub TIEHASH {
6626
    my ($class, @data) = @_;
6627
    bless \@data, $class;
6628
}
6629
 
6630
sub FETCH {
6631
    my ($self, $extra) = @_;
6632
    return bless ref($self)->new(@$self, $extra), ref($self);
6633
}
6634
 
6635
# Modification for cloc:  only need a few modules from Regexp::Common.
6636
my %imports = map {$_ => "Regexp::Common::$_"}
6637
              qw /balanced comment delimited /;
6638
#my %imports = map {$_ => "Regexp::Common::$_"}
6639
#              qw /balanced CC     comment   delimited lingua list
6640
#                  net      number profanity SEN       URI    whitespace
6641
#                  zip/;
6642
 
6643
sub import {
6644
    shift;  # Shift off the class.
6645
    tie %RE, __PACKAGE__;
6646
    {
6647
        no strict 'refs';
6648
        *{caller() . "::RE"} = \%RE;
6649
    }
6650
 
6651
    my $saw_import;
6652
    my $no_defaults;
6653
    my %exclude;
6654
    foreach my $entry (grep {!/^RE_/} @_) {
6655
        if ($entry eq 'pattern') {
6656
            no strict 'refs';
6657
            *{caller() . "::pattern"} = \&pattern;
6658
            next;
6659
        }
6660
        # This used to prevent $; from being set. We still recognize it,
6661
        # but we won't do anything.
6662
        if ($entry eq 'clean') {
6663
            next;
6664
        }
6665
        if ($entry eq 'no_defaults') {
6666
            $no_defaults ++;
6667
            next;
6668
        }
6669
        if (my $module = $imports {$entry}) {
6670
            $saw_import ++;
6671
            eval "require $module;";
6672
            die $@ if $@;
6673
            next;
6674
        }
6675
        if ($entry =~ /^!(.*)/ && $imports {$1}) {
6676
            $exclude {$1} ++;
6677
            next;
6678
        }
6679
        # As a last resort, try to load the argument.
6680
        my $module = $entry =~ /^Regexp::Common/
6681
                            ? $entry
6682
                            : "Regexp::Common::" . $entry;
6683
        eval "require $module;";
6684
        die $@ if $@;
6685
    }
6686
 
6687
    unless ($saw_import || $no_defaults) {
6688
        foreach my $module (values %imports) {
6689
            next if $exclude {$module};
6690
            eval "require $module;";
6691
            die $@ if $@;
6692
        }
6693
    }
6694
 
6695
    my %exported;
6696
    foreach my $entry (grep {/^RE_/} @_) {
6697
        if ($entry =~ /^RE_(\w+_)?ALL$/) {
6698
            my $m  = defined $1 ? $1 : "";
6699
            my $re = qr /^RE_${m}.*$/;
6700
            while (my ($sub, $interface) = each %sub_interface) {
6701
                next if $exported {$sub};
6702
                next unless $sub =~ /$re/;
6703
                {
6704
                    no strict 'refs';
6705
                    *{caller() . "::$sub"} = $interface;
6706
                }
6707
                $exported {$sub} ++;
6708
            }
6709
        }
6710
        else {
6711
            next if $exported {$entry};
6712
            _croak "Can't export unknown subroutine &$entry"
6713
                unless $sub_interface {$entry};
6714
            {
6715
                no strict 'refs';
6716
                *{caller() . "::$entry"} = $sub_interface {$entry};
6717
            }
6718
            $exported {$entry} ++;
6719
        }
6720
    }
6721
}
6722
 
6723
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
6724
 
6725
sub DESTROY {}
6726
 
6727
my %cache;
6728
 
6729
my $fpat = qr/^(-\w+)/;
6730
 
6731
sub _decache {
6732
        my @args = @{tied %{$_[0]}};
6733
        my @nonflags = grep {!/$fpat/} @args;
6734
        my $cache = get_cache(@nonflags);
6735
        _croak "Can't create unknown regex: \$RE{"
6736
            . join("}{",@args) . "}"
6737
                unless exists $cache->{__VAL__};
6738
        _croak "Perl $] does not support the pattern "
6739
            . "\$RE{" . join("}{",@args)
6740
            . "}.\nYou need Perl $cache->{__VAL__}{version} or later"
6741
                unless ($cache->{__VAL__}{version}||0) <= $];
6742
        my %flags = ( %{$cache->{__VAL__}{default}},
6743
                      map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
6744
                          : /$fpat/           ? ($1 => undef)
6745
                          :                     ()
6746
                          } @args);
6747
        $cache->{__VAL__}->_clone_with(\@args, \%flags);
6748
}
6749
 
6750
use overload q{""} => \&_decache;
6751
 
6752
 
6753
sub get_cache {
6754
        my $cache = \%cache;
6755
        foreach (@_) {
6756
                $cache = $cache->{$_}
6757
                      || ($cache->{$_} = {});
6758
        }
6759
        return $cache;
6760
}
6761
 
6762
sub croak_version {
6763
        my ($entry, @args) = @_;
6764
}
6765
 
6766
sub pattern {
6767
        my %spec = @_;
6768
        _croak 'pattern() requires argument: name => [ @list ]'
6769
                unless $spec{name} && ref $spec{name} eq 'ARRAY';
6770
        _croak 'pattern() requires argument: create => $sub_ref_or_string'
6771
                unless $spec{create};
6772
 
6773
        if (ref $spec{create} ne "CODE") {
6774
                my $fixed_str = "$spec{create}";
6775
                $spec{create} = sub { $fixed_str }
6776
        }
6777
 
6778
        my @nonflags;
6779
        my %default;
6780
        foreach ( @{$spec{name}} ) {
6781
                if (/$fpat=(.*)/) {
6782
                        $default{$1} = $2;
6783
                }
6784
                elsif (/$fpat\s*$/) {
6785
                        $default{$1} = undef;
6786
                }
6787
                else {
6788
                        push @nonflags, $_;
6789
                }
6790
        }
6791
 
6792
        my $entry = get_cache(@nonflags);
6793
 
6794
        if ($entry->{__VAL__}) {
6795
                _carp "Overriding \$RE{"
6796
                   . join("}{",@nonflags)
6797
                   . "}";
6798
        }
6799
 
6800
        $entry->{__VAL__} = bless {
6801
                                create  => $spec{create},
6802
                                match   => $spec{match} || \&generic_match,
6803
                                subs    => $spec{subs}  || \&generic_subs,
6804
                                version => $spec{version},
6805
                                default => \%default,
6806
                            }, 'Regexp::Common::Entry';
6807
 
6808
        foreach (@nonflags) {s/\W/X/g}
6809
        my $subname = "RE_" . join ("_", @nonflags);
6810
        $sub_interface{$subname} = sub {
6811
                push @_ => undef if @_ % 2;
6812
                my %flags = @_;
6813
                my $pat = $spec{create}->($entry->{__VAL__},
6814
                               {%default, %flags}, \@nonflags);
6815
                if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
6816
                else { $pat =~ s/\Q(?k:/(?:/g; }
6817
                return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
6818
        };
6819
 
6820
        return 1;
6821
}
6822
 
6823
sub generic_match {$_ [1] =~  /$_[0]/}
6824
sub generic_subs  {$_ [1] =~ s/$_[0]/$_[2]/}
6825
 
6826
sub matches {
6827
        my ($self, $str) = @_;
6828
        my $entry = $self -> _decache;
6829
        $entry -> {match} -> ($entry, $str);
6830
}
6831
 
6832
sub subs {
6833
        my ($self, $str, $newstr) = @_;
6834
        my $entry = $self -> _decache;
6835
        $entry -> {subs} -> ($entry, $str, $newstr);
6836
        return $str;
6837
}
6838
 
6839
 
6840
package Regexp::Common::Entry;
6841
# use Carp;
6842
 
6843
local $^W = 1;
6844
 
6845
use overload
6846
    q{""} => sub {
6847
        my ($self) = @_;
6848
        my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
6849
        if (exists $self->{flags}{-keep}) {
6850
            $pat =~ s/\Q(?k:/(/g;
6851
        }
6852
        else {
6853
            $pat =~ s/\Q(?k:/(?:/g;
6854
        }
6855
        if (exists $self->{flags}{-i})   { $pat = "(?i)$pat" }
6856
        return $pat;
6857
    };
6858
 
6859
sub _clone_with {
6860
    my ($self, $args, $flags) = @_;
6861
    bless { %$self, args=>$args, flags=>$flags }, ref $self;
6862
}
6863
# 
6864
#    Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights
6865
#  Reserved. This module is free software. It may be used, redistributed
6866
#      and/or modified under the terms of the Perl Artistic License
6867
#            (see http://www.perl.com/perl/misc/Artistic.html)
6868
EOCommon
6869
# 2}}}
6870
$Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
6871
# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $
6872
 
6873
package Regexp::Common::comment;
6874
 
6875
use strict;
6876
local $^W = 1;
6877
 
6878
use Regexp::Common qw /pattern clean no_defaults/;
6879
use vars qw /$VERSION/;
6880
 
6881
($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g;
6882
 
6883
my @generic = (
6884
    {languages => [qw /ABC Forth/],
6885
     to_eol    => ['\\\\']},   # This is for just a *single* backslash.
6886
 
6887
    {languages => [qw /Ada Alan Eiffel lua/],
6888
     to_eol    => ['--']},
6889
 
6890
    {languages => [qw /Advisor/],
6891
     to_eol    => ['#|//']},
6892
 
6893
    {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
6894
                       SMITH zonefile/],
6895
     to_eol    => [';']},
6896
 
6897
    {languages => ['Algol 60'],
6898
     from_to   => [[qw /comment ;/]]},
6899
 
6900
    {languages => [qw {ALPACA B C C-- LPC PL/I}],
6901
     from_to   => [[qw {/* */}]]},
6902
 
6903
    {languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/],
6904
     to_eol    => ['#']},
6905
 
6906
    {languages => [[BASIC => 'mvEnterprise']],
6907
     to_eol    => ['[*!]|REM']},
6908
 
6909
    {languages => [qw /Befunge-98 Funge-98 Shelta/],
6910
     id        => [';']},
6911
 
6912
    {languages => ['beta-Juliet', 'Crystal Report', 'Portia'],
6913
     to_eol    => ['//']},
6914
 
6915
    {languages => ['BML'],
6916
     from_to   => [['<?_c', '_c?>']],
6917
    },
6918
 
6919
    {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
6920
     to_eol    => ['//'],
6921
     from_to   => [[qw {/* */}]]},
6922
 
6923
    {languages => [qw /CLU LaTeX slrn TeX/],
6924
     to_eol    => ['%']},
6925
 
6926
    {languages => [qw /False/],
6927
     from_to   => [[qw !{ }!]]},
6928
 
6929
    {languages => [qw /Fortran/],
6930
     to_eol    => ['!']},
6931
 
6932
    {languages => [qw /Haifu/],
6933
     id        => [',']},
6934
 
6935
    {languages => [qw /ILLGOL/],
6936
     to_eol    => ['NB']},
6937
 
6938
    {languages => [qw /INTERCAL/],
6939
     to_eol    => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
6940
 
6941
    {languages => [qw /J/],
6942
     to_eol    => ['NB[.]']},
6943
 
6944
    {languages => [qw /Nickle/],
6945
     to_eol    => ['#'],
6946
     from_to   => [[qw {/* */}]]},
6947
 
6948
    {languages => [qw /Oberon/],
6949
     from_to   => [[qw /(* *)/]]},
6950
 
6951
    {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
6952
     to_eol    => ['//'],
6953
     from_to   => [[qw !{ }!], [qw !(* *)!]]},
6954
 
6955
    {languages => [[qw /Pascal Workshop/]],
6956
     id        => [qw /"/],
6957
     from_to   => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
6958
 
6959
    {languages => [qw /PEARL/],
6960
     to_eol    => ['!'],
6961
     from_to   => [[qw {/* */}]]},
6962
 
6963
    {languages => [qw /PHP/],
6964
     to_eol    => ['#', '//'],
6965
     from_to   => [[qw {/* */}]]},
6966
 
6967
    {languages => [qw !PL/B!],
6968
     to_eol    => ['[.;]']},
6969
 
6970
    {languages => [qw !PL/SQL!],
6971
     to_eol    => ['--'],
6972
     from_to   => [[qw {/* */}]]},
6973
 
6974
    {languages => [qw /Q-BAL/],
6975
     to_eol    => ['`']},
6976
 
6977
    {languages => [qw /Smalltalk/],
6978
     id        => ['"']},
6979
 
6980
    {languages => [qw /SQL/],
6981
     to_eol    => ['-{2,}']},
6982
 
6983
    {languages => [qw /troff/],
6984
     to_eol    => ['\\\"']},
6985
 
6986
    {languages => [qw /vi/],
6987
     to_eol    => ['"']},
6988
 
6989
    {languages => [qw /*W/],
6990
     from_to   => [[qw {|| !!}]]},
6991
);
6992
 
6993
my @plain_or_nested = (
6994
   [Caml         =>  undef,       "(*"  => "*)"],
6995
   [Dylan        =>  "//",        "/*"  => "*/"],
6996
   [Haskell      =>  "-{2,}",     "{-"  => "-}"],
6997
   [Hugo         =>  "!(?!\\\\)", "!\\" => "\\!"],
6998
   [SLIDE        =>  "#",         "(*"  => "*)"],
6999
);
7000
 
7001
#
7002
# Helper subs.
7003
#
7004
 
7005
sub combine      {
7006
    local $_ = join "|", @_;
7007
    if (@_ > 1) {
7008
        s/\(\?k:/(?:/g;
7009
        $_ = "(?k:$_)";
7010
    }
7011
    $_
7012
}
7013
 
7014
sub to_eol  ($)  {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
7015
sub id      ($)  {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"}  # One char only!
7016
sub from_to      {
7017
    local $^W = 1;
7018
    my ($begin, $end) = @_;
7019
 
7020
    my $qb  = quotemeta $begin;
7021
    my $qe  = quotemeta $end;
7022
    my $fe  = quotemeta substr $end   => 0, 1;
7023
    my $te  = quotemeta substr $end   => 1;
7024
 
7025
    "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
7026
}
7027
 
7028
 
7029
my $count = 0;
7030
sub nested {
7031
    local $^W = 1;
7032
    my ($begin, $end) = @_;
7033
 
7034
    $count ++;
7035
    my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
7036
 
7037
    my $qb  = quotemeta $begin;
7038
    my $qe  = quotemeta $end;
7039
    my $fb  = quotemeta substr $begin => 0, 1;
7040
    my $fe  = quotemeta substr $end   => 0, 1;
7041
 
7042
    my $tb  = quotemeta substr $begin => 1;
7043
    my $te  = quotemeta substr $end   => 1;
7044
 
7045
    use re 'eval';
7046
 
7047
    my $re;
7048
    if ($fb eq $fe) {
7049
        $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
7050
    }
7051
    else {
7052
        local $"      =  "|";
7053
        my   @clauses =  "(?>[^$fb$fe]+)";
7054
        push @clauses => "$fb(?!$tb)" if length $tb;
7055
        push @clauses => "$fe(?!$te)" if length $te;
7056
        push @clauses =>  $r;
7057
        $re           =   qr /(?:$qb(?:@clauses)*$qe)/;
7058
    }
7059
 
7060
    $Regexp::Common::comment [$count] = qr/$re/;
7061
}
7062
 
7063
#
7064
# Process data.
7065
#
7066
 
7067
foreach my $info (@plain_or_nested) {
7068
    my ($language, $mark, $begin, $end) = @$info;
7069
    pattern name    => [comment => $language],
7070
            create  =>
7071
                sub {my $re     = nested $begin => $end;
7072
                     my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
7073
                     exists $_ [1] -> {-keep} ? qr /($prefix$re)/
7074
                                              : qr  /$prefix$re/
7075
                },
7076
            version => 5.006,
7077
            ;
7078
}
7079
 
7080
 
7081
foreach my $group (@generic) {
7082
    my $pattern = combine +(map {to_eol   $_} @{$group -> {to_eol}}),
7083
                           (map {from_to @$_} @{$group -> {from_to}}),
7084
                           (map {id       $_} @{$group -> {id}}),
7085
                  ;
7086
    foreach my $language  (@{$group -> {languages}}) {
7087
        pattern name    => [comment => ref $language ? @$language : $language],
7088
                create  => $pattern,
7089
                ;
7090
    }
7091
}
7092
 
7093
 
7094
 
7095
#
7096
# Other languages.
7097
#
7098
 
7099
# http://www.pascal-central.com/docs/iso10206.txt
7100
pattern name    => [qw /comment Pascal/],
7101
        create  => '(?k:' . '(?k:[{]|[(][*])'
7102
                          . '(?k:[^}*]*(?:[*][^)][^}*]*)*)'
7103
                          . '(?k:[}]|[*][)])'
7104
                          . ')'
7105
        ;
7106
 
7107
# http://www.templetons.com/brad/alice/language/
7108
pattern name    =>  [qw /comment Pascal Alice/],
7109
        create  =>  '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
7110
        ;
7111
 
7112
 
7113
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
7114
pattern name    => [qw (comment), 'Algol 68'],
7115
        create  => q {(?k:(?:#[^#]*#)|}                           .
7116
                   q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
7117
                   q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
7118
        ;
7119
 
7120
 
7121
# See rules 91 and 92 of ISO 8879 (SGML).
7122
# Charles F. Goldfarb: "The SGML Handbook".
7123
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
7124
# Ch. 10.3, pp 390.
7125
pattern name    => [qw (comment HTML)],
7126
        create  => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
7127
        ;
7128
 
7129
 
7130
pattern name    => [qw /comment SQL MySQL/],
7131
        create  => q {(?k:(?:#|-- )[^\n]*\n|} .
7132
                   q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
7133
        ;
7134
 
7135
# Anything that isn't <>[]+-.,
7136
# http://home.wxs.nl/~faase009/Ha_BF.html
7137
pattern name    => [qw /comment Brainfuck/],
7138
        create  => '(?k:[^<>\[\]+\-.,]+)'
7139
        ;
7140
 
7141
# Squeak is a variant of Smalltalk-80.
7142
# http://www.squeak.
7143
# http://mucow.com/squeak-qref.html
7144
pattern name    => [qw /comment Squeak/],
7145
        create  => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
7146
        ;
7147
 
7148
#
7149
# Scores of less than 5 or above 17....
7150
# http://www.cliff.biffle.org/esoterica/beatnik.html
7151
@Regexp::Common::comment::scores = (1,  3,  3,  2,  1,  4,  2,  4,  1,  8,
7152
                                    5,  1,  3,  1,  1,  3, 10,  1,  1,  1,
7153
                                    1,  4,  4,  8,  4, 10);
7154
pattern name    =>  [qw /comment Beatnik/],
7155
        create  =>  sub {
7156
            use re 'eval';
7157
            my ($s, $x);
7158
            my $re = qr {\b([A-Za-z]+)\b
7159
                         (?(?{($s, $x) = (0, lc $^N);
7160
                              $s += $Regexp::Common::comment::scores
7161
                                    [ord (chop $x) - ord ('a')] while length $x;
7162
                              $s  >= 5 && $s < 18})XXX|)}x;
7163
            $re;
7164
        },
7165
        version  => 5.008,
7166
        ;
7167
 
7168
 
7169
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
7170
#  (Goto table of contents/3.3 Source Form)
7171
# Fortran, in fixed format. Comments start with a C, c or * in the first
7172
# column, or a ! anywhere, but the sixth column. Then end with a newline.
7173
pattern name    =>  [qw /comment Fortran fixed/],
7174
        create  =>  '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
7175
        ;
7176
 
7177
 
7178
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
7179
# Traditionally, comments in COBOL were indicated with an asteriks in
7180
# the seventh column. Modern compilers may be more lenient.
7181
pattern name    =>  [qw /comment COBOL/],
7182
        create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
7183
        version =>  '5.008',
7184
        ;
7185
 
7186
1;
7187
#
7188
#    Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7189
#      This module is free software. It may be used, redistributed
7190
#     and/or modified under the terms of the Perl Artistic License
7191
#           (see http://www.perl.com/perl/misc/Artistic.html)
7192
EOC
7193
# 2}}}
7194
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
7195
package Regexp::Common::balanced; {
7196
 
7197
use strict;
7198
local $^W = 1;
7199
 
7200
use vars qw /$VERSION/;
7201
($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g;
7202
 
7203
use Regexp::Common qw /pattern clean no_defaults/;
7204
 
7205
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
7206
my $count = -1;
7207
my %cache;
7208
 
7209
sub nested {
7210
    local $^W = 1;
7211
    my ($start, $finish) = @_;
7212
 
7213
    return $Regexp::Common::balanced [$cache {$start} {$finish}]
7214
            if exists $cache {$start} {$finish};
7215
 
7216
    $count ++;
7217
    my $r = '(??{$Regexp::Common::balanced ['. $count . ']})';
7218
 
7219
    my @starts   = map {s/\\(.)/$1/g; $_} grep {length}
7220
                        $start  =~ /([^|\\]+|\\.)+/gs;
7221
    my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
7222
                        $finish =~ /([^|\\]+|\\.)+/gs;
7223
 
7224
    push @finishes => ($finishes [-1]) x (@starts - @finishes);
7225
 
7226
    my @re;
7227
    local $" = "|";
7228
    foreach my $begin (@starts) {
7229
        my $end = shift @finishes;
7230
 
7231
        my $qb  = quotemeta $begin;
7232
        my $qe  = quotemeta $end;
7233
        my $fb  = quotemeta substr $begin => 0, 1;
7234
        my $fe  = quotemeta substr $end   => 0, 1;
7235
 
7236
        my $tb  = quotemeta substr $begin => 1;
7237
        my $te  = quotemeta substr $end   => 1;
7238
 
7239
        use re 'eval';
7240
 
7241
        my $add;
7242
        if ($fb eq $fe) {
7243
            push @re =>
7244
                   qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
7245
        }
7246
        else {
7247
            my   @clauses =  "(?>[^$fb$fe]+)";
7248
            push @clauses => "$fb(?!$tb)" if length $tb;
7249
            push @clauses => "$fe(?!$te)" if length $te;
7250
            push @clauses =>  $r;
7251
            push @re      =>  qr /(?:$qb(?:@clauses)*$qe)/;
7252
        }
7253
    }
7254
 
7255
    $cache {$start} {$finish} = $count;
7256
    $Regexp::Common::balanced [$count] = qr/@re/;
7257
}
7258
 
7259
 
7260
pattern name    => [qw /balanced -parens=() -begin= -end=/],
7261
        create  => sub {
7262
            my $flag = $_[1];
7263
            unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
7264
                    defined $flag -> {-end}   && length $flag -> {-end}) {
7265
                my @open  = grep {index ($flag->{-parens}, $_) >= 0}
7266
                             ('[','(','{','<');
7267
                my @close = map {$closer {$_}} @open;
7268
                $flag -> {-begin} = join "|" => @open;
7269
                $flag -> {-end}   = join "|" => @close;
7270
            }
7271
            my $pat = nested @$flag {qw /-begin -end/};
7272
            return exists $flag -> {-keep} ? qr /($pat)/ : $pat;
7273
        },
7274
        version => 5.006,
7275
        ;
7276
 
7277
}
7278
 
7279
1;
7280
#
7281
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7282
#       This module is free software. It may be used, redistributed
7283
#      and/or modified under the terms of the Perl Artistic License
7284
#            (see http://www.perl.com/perl/misc/Artistic.html)
7285
EOB
7286
# 2}}}
7287
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
7288
# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $
7289
 
7290
package Regexp::Common::delimited;
7291
 
7292
use strict;
7293
local $^W = 1;
7294
 
7295
use Regexp::Common qw /pattern clean no_defaults/;
7296
use vars qw /$VERSION/;
7297
 
7298
($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g;
7299
 
7300
sub gen_delimited {
7301
 
7302
    my ($dels, $escs) = @_;
7303
    # return '(?:\S*)' unless $dels =~ /\S/;
7304
    if (length $escs) {
7305
        $escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
7306
    }
7307
    my @pat = ();
7308
    my $i;
7309
    for ($i=0; $i < length $dels; $i++) {
7310
        my $del = quotemeta substr ($dels, $i, 1);
7311
        my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
7312
        if ($del eq $esc) {
7313
            push @pat,
7314
                 "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
7315
        }
7316
        elsif (length $esc) {
7317
            push @pat,
7318
                 "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
7319
        }
7320
        else {
7321
            push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
7322
        }
7323
    }
7324
    my $pat = join '|', @pat;
7325
    return "(?k:$pat)";
7326
}
7327
 
7328
sub _croak {
7329
    require Carp;
7330
    goto &Carp::croak;
7331
}
7332
 
7333
pattern name   => [qw( delimited -delim= -esc=\\ )],
7334
        create => sub {my $flags = $_[1];
7335
                       _croak 'Must specify delimiter in $RE{delimited}'
7336
                             unless length $flags->{-delim};
7337
                       return gen_delimited (@{$flags}{-delim, -esc});
7338
                  },
7339
        ;
7340
 
7341
pattern name   => [qw( quoted -esc=\\ )],
7342
        create => sub {my $flags = $_[1];
7343
                       return gen_delimited (q{"'`}, $flags -> {-esc});
7344
                  },
7345
        ;
7346
 
7347
 
7348
1;
7349
#
7350
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7351
#       This module is free software. It may be used, redistributed
7352
#      and/or modified under the terms of the Perl Artistic License
7353
#            (see http://www.perl.com/perl/misc/Artistic.html)
7354
EOD
7355
# 2}}}
7356
    my $problems        = 0;
7357
    $HAVE_Rexexp_Common = 0;
7358
    my $dir             = "";
7359
    if ($opt_sdir) {
7360
        ++$TEMP_OFF;
7361
        $dir = "$opt_sdir/$TEMP_OFF";
7362
        File::Path::rmtree($dir) if     is_dir($dir);
7363
        File::Path::mkpath($dir) unless is_dir($dir);
7364
    } else {
7365
        # let File::Temp create a suitable temporary directory
7366
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
7367
        $TEMP_INST{ $dir } = "Regexp::Common";
7368
    }
7369
    print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
7370
    my $Regexp_dir        = "$dir/Regexp";
7371
    my $Regexp_Common_dir = "$dir/Regexp/Common";
7372
    mkdir $Regexp_dir       ;
7373
    mkdir $Regexp_Common_dir;
7374
 
7375
    foreach my $module_file (keys %Regexp_Common_Contents) {
7376
        my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w";
7377
        if (defined $OUT) {
7378
            print $OUT $Regexp_Common_Contents{$module_file};
7379
            $OUT->close;
7380
        } else {
7381
            warn "Failed to install Regexp::${module_file}.pm\n";
7382
            $problems = 1;
7383
        }
7384
    }
7385
 
7386
    push @INC, $dir;
7387
    eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
7388
    $HAVE_Rexexp_Common = 1 unless $problems;
7389
} # 1}}}
7390
sub Install_Algorithm_Diff {                 # {{{1
7391
    # Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a 
7392
    # temporary directory for the duration of this run.
7393
 
7394
my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
7395
package Algorithm::Diff;
7396
# Skip to first "=head" line for documentation.
7397
use strict;
7398
 
7399
use integer;    # see below in _replaceNextLargerWith() for mod to make
7400
                # if you don't use this
7401
use vars qw( $VERSION @EXPORT_OK );
7402
$VERSION = 1.19_02;
7403
#          ^ ^^ ^^-- Incremented at will
7404
#          | \+----- Incremented for non-trivial changes to features
7405
#          \-------- Incremented for fundamental changes
7406
require Exporter;
7407
*import    = \&Exporter::import;
7408
@EXPORT_OK = qw(
7409
    prepare LCS LCSidx LCS_length
7410
    diff sdiff compact_diff
7411
    traverse_sequences traverse_balanced
7412
);
7413
 
7414
# McIlroy-Hunt diff algorithm
7415
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
7416
# by Ned Konz, perl@bike-nomad.com
7417
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
7418
 
7419
# Create a hash that maps each element of $aCollection to the set of
7420
# positions it occupies in $aCollection, restricted to the elements
7421
# within the range of indexes specified by $start and $end.
7422
# The fourth parameter is a subroutine reference that will be called to
7423
# generate a string to use as a key.
7424
# Additional parameters, if any, will be passed to this subroutine.
7425
#
7426
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
7427
 
7428
sub _withPositionsOfInInterval
7429
{
7430
    my $aCollection = shift;    # array ref
7431
    my $start       = shift;
7432
    my $end         = shift;
7433
    my $keyGen      = shift;
7434
    my %d;
7435
    my $index;
7436
    for ( $index = $start ; $index <= $end ; $index++ )
7437
    {
7438
        my $element = $aCollection->[$index];
7439
        my $key = &$keyGen( $element, @_ );
7440
        if ( exists( $d{$key} ) )
7441
        {
7442
            unshift ( @{ $d{$key} }, $index );
7443
        }
7444
        else
7445
        {
7446
            $d{$key} = [$index];
7447
        }
7448
    }
7449
    return wantarray ? %d : \%d;
7450
}
7451
 
7452
# Find the place at which aValue would normally be inserted into the
7453
# array. If that place is already occupied by aValue, do nothing, and
7454
# return undef. If the place does not exist (i.e., it is off the end of
7455
# the array), add it to the end, otherwise replace the element at that
7456
# point with aValue.  It is assumed that the array's values are numeric.
7457
# This is where the bulk (75%) of the time is spent in this module, so
7458
# try to make it fast!
7459
 
7460
sub _replaceNextLargerWith
7461
{
7462
    my ( $array, $aValue, $high ) = @_;
7463
    $high ||= $#$array;
7464
 
7465
    # off the end?
7466
    if ( $high == -1 || $aValue > $array->[-1] )
7467
    {
7468
        push ( @$array, $aValue );
7469
        return $high + 1;
7470
    }
7471
 
7472
    # binary search for insertion point...
7473
    my $low = 0;
7474
    my $index;
7475
    my $found;
7476
    while ( $low <= $high )
7477
    {
7478
        $index = ( $high + $low ) / 2;
7479
 
7480
        # $index = int(( $high + $low ) / 2);  # without 'use integer'
7481
        $found = $array->[$index];
7482
 
7483
        if ( $aValue == $found )
7484
        {
7485
            return undef;
7486
        }
7487
        elsif ( $aValue > $found )
7488
        {
7489
            $low = $index + 1;
7490
        }
7491
        else
7492
        {
7493
            $high = $index - 1;
7494
        }
7495
    }
7496
 
7497
    # now insertion point is in $low.
7498
    $array->[$low] = $aValue;    # overwrite next larger
7499
    return $low;
7500
}
7501
 
7502
# This method computes the longest common subsequence in $a and $b.
7503
 
7504
# Result is array or ref, whose contents is such that
7505
#   $a->[ $i ] == $b->[ $result[ $i ] ]
7506
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
7507
 
7508
# An additional argument may be passed; this is a hash or key generating
7509
# function that should return a string that uniquely identifies the given
7510
# element.  It should be the case that if the key is the same, the elements
7511
# will compare the same. If this parameter is undef or missing, the key
7512
# will be the element as a string.
7513
 
7514
# By default, comparisons will use "eq" and elements will be turned into keys
7515
# using the default stringizing operator '""'.
7516
 
7517
# Additional parameters, if any, will be passed to the key generation
7518
# routine.
7519
 
7520
sub _longestCommonSubsequence
7521
{
7522
    my $a        = shift;    # array ref or hash ref
7523
    my $b        = shift;    # array ref or hash ref
7524
    my $counting = shift;    # scalar
7525
    my $keyGen   = shift;    # code ref
7526
    my $compare;             # code ref
7527
 
7528
    if ( ref($a) eq 'HASH' )
7529
    {                        # prepared hash must be in $b
7530
        my $tmp = $b;
7531
        $b = $a;
7532
        $a = $tmp;
7533
    }
7534
 
7535
    # Check for bogus (non-ref) argument values
7536
    if ( !ref($a) || !ref($b) )
7537
    {
7538
        my @callerInfo = caller(1);
7539
        die 'error: must pass array or hash references to ' . $callerInfo[3];
7540
    }
7541
 
7542
    # set up code refs
7543
    # Note that these are optimized.
7544
    if ( !defined($keyGen) )    # optimize for strings
7545
    {
7546
        $keyGen = sub { $_[0] };
7547
        $compare = sub { my ( $a, $b ) = @_; $a eq $b };
7548
    }
7549
    else
7550
    {
7551
        $compare = sub {
7552
            my $a = shift;
7553
            my $b = shift;
7554
            &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
7555
        };
7556
    }
7557
 
7558
    my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
7559
    my ( $prunedCount, $bMatches ) = ( 0, {} );
7560
 
7561
    if ( ref($b) eq 'HASH' )    # was $bMatches prepared for us?
7562
    {
7563
        $bMatches = $b;
7564
    }
7565
    else
7566
    {
7567
        my ( $bStart, $bFinish ) = ( 0, $#$b );
7568
 
7569
        # First we prune off any common elements at the beginning
7570
        while ( $aStart <= $aFinish
7571
            and $bStart <= $bFinish
7572
            and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
7573
        {
7574
            $matchVector->[ $aStart++ ] = $bStart++;
7575
            $prunedCount++;
7576
        }
7577
 
7578
        # now the end
7579
        while ( $aStart <= $aFinish
7580
            and $bStart <= $bFinish
7581
            and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
7582
        {
7583
            $matchVector->[ $aFinish-- ] = $bFinish--;
7584
            $prunedCount++;
7585
        }
7586
 
7587
        # Now compute the equivalence classes of positions of elements
7588
        $bMatches =
7589
          _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
7590
    }
7591
    my $thresh = [];
7592
    my $links  = [];
7593
 
7594
    my ( $i, $ai, $j, $k );
7595
    for ( $i = $aStart ; $i <= $aFinish ; $i++ )
7596
    {
7597
        $ai = &$keyGen( $a->[$i], @_ );
7598
        if ( exists( $bMatches->{$ai} ) )
7599
        {
7600
            $k = 0;
7601
            for $j ( @{ $bMatches->{$ai} } )
7602
            {
7603
 
7604
                # optimization: most of the time this will be true
7605
                if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
7606
                {
7607
                    $thresh->[$k] = $j;
7608
                }
7609
                else
7610
                {
7611
                    $k = _replaceNextLargerWith( $thresh, $j, $k );
7612
                }
7613
 
7614
                # oddly, it's faster to always test this (CPU cache?).
7615
                if ( defined($k) )
7616
                {
7617
                    $links->[$k] =
7618
                      [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
7619
                }
7620
            }
7621
        }
7622
    }
7623
 
7624
    if (@$thresh)
7625
    {
7626
        return $prunedCount + @$thresh if $counting;
7627
        for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
7628
        {
7629
            $matchVector->[ $link->[1] ] = $link->[2];
7630
        }
7631
    }
7632
    elsif ($counting)
7633
    {
7634
        return $prunedCount;
7635
    }
7636
 
7637
    return wantarray ? @$matchVector : $matchVector;
7638
}
7639
 
7640
sub traverse_sequences
7641
{
7642
    my $a                 = shift;          # array ref
7643
    my $b                 = shift;          # array ref
7644
    my $callbacks         = shift || {};
7645
    my $keyGen            = shift;
7646
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
7647
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
7648
    my $finishedACallback = $callbacks->{'A_FINISHED'};
7649
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
7650
    my $finishedBCallback = $callbacks->{'B_FINISHED'};
7651
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
7652
 
7653
    # Process all the lines in @$matchVector
7654
    my $lastA = $#$a;
7655
    my $lastB = $#$b;
7656
    my $bi    = 0;
7657
    my $ai;
7658
 
7659
    for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
7660
    {
7661
        my $bLine = $matchVector->[$ai];
7662
        if ( defined($bLine) )    # matched
7663
        {
7664
            &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
7665
            &$matchCallback( $ai,    $bi++, @_ );
7666
        }
7667
        else
7668
        {
7669
            &$discardACallback( $ai, $bi, @_ );
7670
        }
7671
    }
7672
 
7673
    # The last entry (if any) processed was a match.
7674
    # $ai and $bi point just past the last matching lines in their sequences.
7675
 
7676
    while ( $ai <= $lastA or $bi <= $lastB )
7677
    {
7678
 
7679
        # last A?
7680
        if ( $ai == $lastA + 1 and $bi <= $lastB )
7681
        {
7682
            if ( defined($finishedACallback) )
7683
            {
7684
                &$finishedACallback( $lastA, @_ );
7685
                $finishedACallback = undef;
7686
            }
7687
            else
7688
            {
7689
                &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
7690
            }
7691
        }
7692
 
7693
        # last B?
7694
        if ( $bi == $lastB + 1 and $ai <= $lastA )
7695
        {
7696
            if ( defined($finishedBCallback) )
7697
            {
7698
                &$finishedBCallback( $lastB, @_ );
7699
                $finishedBCallback = undef;
7700
            }
7701
            else
7702
            {
7703
                &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
7704
            }
7705
        }
7706
 
7707
        &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
7708
        &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
7709
    }
7710
 
7711
    return 1;
7712
}
7713
 
7714
sub traverse_balanced
7715
{
7716
    my $a                 = shift;              # array ref
7717
    my $b                 = shift;              # array ref
7718
    my $callbacks         = shift || {};
7719
    my $keyGen            = shift;
7720
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
7721
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
7722
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
7723
    my $changeCallback    = $callbacks->{'CHANGE'};
7724
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
7725
 
7726
    # Process all the lines in match vector
7727
    my $lastA = $#$a;
7728
    my $lastB = $#$b;
7729
    my $bi    = 0;
7730
    my $ai    = 0;
7731
    my $ma    = -1;
7732
    my $mb;
7733
 
7734
    while (1)
7735
    {
7736
 
7737
        # Find next match indices $ma and $mb
7738
        do {
7739
            $ma++;
7740
        } while(
7741
                $ma <= $#$matchVector
7742
            &&  !defined $matchVector->[$ma]
7743
        );
7744
 
7745
        last if $ma > $#$matchVector;    # end of matchVector?
7746
        $mb = $matchVector->[$ma];
7747
 
7748
        # Proceed with discard a/b or change events until
7749
        # next match
7750
        while ( $ai < $ma || $bi < $mb )
7751
        {
7752
 
7753
            if ( $ai < $ma && $bi < $mb )
7754
            {
7755
 
7756
                # Change
7757
                if ( defined $changeCallback )
7758
                {
7759
                    &$changeCallback( $ai++, $bi++, @_ );
7760
                }
7761
                else
7762
                {
7763
                    &$discardACallback( $ai++, $bi, @_ );
7764
                    &$discardBCallback( $ai, $bi++, @_ );
7765
                }
7766
            }
7767
            elsif ( $ai < $ma )
7768
            {
7769
                &$discardACallback( $ai++, $bi, @_ );
7770
            }
7771
            else
7772
            {
7773
 
7774
                # $bi < $mb
7775
                &$discardBCallback( $ai, $bi++, @_ );
7776
            }
7777
        }
7778
 
7779
        # Match
7780
        &$matchCallback( $ai++, $bi++, @_ );
7781
    }
7782
 
7783
    while ( $ai <= $lastA || $bi <= $lastB )
7784
    {
7785
        if ( $ai <= $lastA && $bi <= $lastB )
7786
        {
7787
 
7788
            # Change
7789
            if ( defined $changeCallback )
7790
            {
7791
                &$changeCallback( $ai++, $bi++, @_ );
7792
            }
7793
            else
7794
            {
7795
                &$discardACallback( $ai++, $bi, @_ );
7796
                &$discardBCallback( $ai, $bi++, @_ );
7797
            }
7798
        }
7799
        elsif ( $ai <= $lastA )
7800
        {
7801
            &$discardACallback( $ai++, $bi, @_ );
7802
        }
7803
        else
7804
        {
7805
 
7806
            # $bi <= $lastB
7807
            &$discardBCallback( $ai, $bi++, @_ );
7808
        }
7809
    }
7810
 
7811
    return 1;
7812
}
7813
 
7814
sub prepare
7815
{
7816
    my $a       = shift;    # array ref
7817
    my $keyGen  = shift;    # code ref
7818
 
7819
    # set up code ref
7820
    $keyGen = sub { $_[0] } unless defined($keyGen);
7821
 
7822
    return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
7823
}
7824
 
7825
sub LCS
7826
{
7827
    my $a = shift;                  # array ref
7828
    my $b = shift;                  # array ref or hash ref
7829
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
7830
    my @retval;
7831
    my $i;
7832
    for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
7833
    {
7834
        if ( defined( $matchVector->[$i] ) )
7835
        {
7836
            push ( @retval, $a->[$i] );
7837
        }
7838
    }
7839
    return wantarray ? @retval : \@retval;
7840
}
7841
 
7842
sub LCS_length
7843
{
7844
    my $a = shift;                          # array ref
7845
    my $b = shift;                          # array ref or hash ref
7846
    return _longestCommonSubsequence( $a, $b, 1, @_ );
7847
}
7848
 
7849
sub LCSidx
7850
{
7851
    my $a= shift @_;
7852
    my $b= shift @_;
7853
    my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
7854
    my @am= grep defined $match->[$_], 0..$#$match;
7855
    my @bm= @{$match}[@am];
7856
    return \@am, \@bm;
7857
}
7858
 
7859
sub compact_diff
7860
{
7861
    my $a= shift @_;
7862
    my $b= shift @_;
7863
    my( $am, $bm )= LCSidx( $a, $b, @_ );
7864
    my @cdiff;
7865
    my( $ai, $bi )= ( 0, 0 );
7866
    push @cdiff, $ai, $bi;
7867
    while( 1 ) {
7868
        while(  @$am  &&  $ai == $am->[0]  &&  $bi == $bm->[0]  ) {
7869
            shift @$am;
7870
            shift @$bm;
7871
            ++$ai, ++$bi;
7872
        }
7873
        push @cdiff, $ai, $bi;
7874
        last   if  ! @$am;
7875
        $ai = $am->[0];
7876
        $bi = $bm->[0];
7877
        push @cdiff, $ai, $bi;
7878
    }
7879
    push @cdiff, 0+@$a, 0+@$b
7880
        if  $ai < @$a || $bi < @$b;
7881
    return wantarray ? @cdiff : \@cdiff;
7882
}
7883
 
7884
sub diff
7885
{
7886
    my $a      = shift;    # array ref
7887
    my $b      = shift;    # array ref
7888
    my $retval = [];
7889
    my $hunk   = [];
7890
    my $discard = sub {
7891
        push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
7892
    };
7893
    my $add = sub {
7894
        push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
7895
    };
7896
    my $match = sub {
7897
        push @$retval, $hunk
7898
            if 0 < @$hunk;
7899
        $hunk = []
7900
    };
7901
    traverse_sequences( $a, $b,
7902
        { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
7903
    &$match();
7904
    return wantarray ? @$retval : $retval;
7905
}
7906
 
7907
sub sdiff
7908
{
7909
    my $a      = shift;    # array ref
7910
    my $b      = shift;    # array ref
7911
    my $retval = [];
7912
    my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
7913
    my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
7914
    my $change = sub {
7915
        push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
7916
    };
7917
    my $match = sub {
7918
        push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
7919
    };
7920
    traverse_balanced(
7921
        $a,
7922
        $b,
7923
        {
7924
            MATCH     => $match,
7925
            DISCARD_A => $discard,
7926
            DISCARD_B => $add,
7927
            CHANGE    => $change,
7928
        },
7929
        @_
7930
    );
7931
    return wantarray ? @$retval : $retval;
7932
}
7933
 
7934
########################################
7935
my $Root= __PACKAGE__;
7936
package Algorithm::Diff::_impl;
7937
use strict;
7938
 
7939
sub _Idx()  { 0 } # $me->[_Idx]: Ref to array of hunk indices
7940
            # 1   # $me->[1]: Ref to first sequence
7941
            # 2   # $me->[2]: Ref to second sequence
7942
sub _End()  { 3 } # $me->[_End]: Diff between forward and reverse pos
7943
sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
7944
sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
7945
sub _Pos()  { 6 } # $me->[_Pos]: Which hunk is currently selected
7946
sub _Off()  { 7 } # $me->[_Off]: Offset into _Idx for current position
7947
sub _Min() { -2 } # Added to _Off to get min instead of max+1
7948
 
7949
sub Die
7950
{
7951
    require Carp;
7952
    Carp::confess( @_ );
7953
}
7954
 
7955
sub _ChkPos
7956
{
7957
    my( $me )= @_;
7958
    return   if  $me->[_Pos];
7959
    my $meth= ( caller(1) )[3];
7960
    Die( "Called $meth on 'reset' object" );
7961
}
7962
 
7963
sub _ChkSeq
7964
{
7965
    my( $me, $seq )= @_;
7966
    return $seq + $me->[_Off]
7967
        if  1 == $seq  ||  2 == $seq;
7968
    my $meth= ( caller(1) )[3];
7969
    Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
7970
}
7971
 
7972
sub getObjPkg
7973
{
7974
    my( $us )= @_;
7975
    return ref $us   if  ref $us;
7976
    return $us . "::_obj";
7977
}
7978
 
7979
sub new
7980
{
7981
    my( $us, $seq1, $seq2, $opts ) = @_;
7982
    my @args;
7983
    for( $opts->{keyGen} ) {
7984
        push @args, $_   if  $_;
7985
    }
7986
    for( $opts->{keyGenArgs} ) {
7987
        push @args, @$_   if  $_;
7988
    }
7989
    my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
7990
    my $same= 1;
7991
    if(  0 == $cdif->[2]  &&  0 == $cdif->[3]  ) {
7992
        $same= 0;
7993
        splice @$cdif, 0, 2;
7994
    }
7995
    my @obj= ( $cdif, $seq1, $seq2 );
7996
    $obj[_End] = (1+@$cdif)/2;
7997
    $obj[_Same] = $same;
7998
    $obj[_Base] = 0;
7999
    my $me = bless \@obj, $us->getObjPkg();
8000
    $me->Reset( 0 );
8001
    return $me;
8002
}
8003
 
8004
sub Reset
8005
{
8006
    my( $me, $pos )= @_;
8007
    $pos= int( $pos || 0 );
8008
    $pos += $me->[_End]
8009
        if  $pos < 0;
8010
    $pos= 0
8011
        if  $pos < 0  ||  $me->[_End] <= $pos;
8012
    $me->[_Pos]= $pos || !1;
8013
    $me->[_Off]= 2*$pos - 1;
8014
    return $me;
8015
}
8016
 
8017
sub Base
8018
{
8019
    my( $me, $base )= @_;
8020
    my $oldBase= $me->[_Base];
8021
    $me->[_Base]= 0+$base   if  defined $base;
8022
    return $oldBase;
8023
}
8024
 
8025
sub Copy
8026
{
8027
    my( $me, $pos, $base )= @_;
8028
    my @obj= @$me;
8029
    my $you= bless \@obj, ref($me);
8030
    $you->Reset( $pos )   if  defined $pos;
8031
    $you->Base( $base );
8032
    return $you;
8033
}
8034
 
8035
sub Next {
8036
    my( $me, $steps )= @_;
8037
    $steps= 1   if  ! defined $steps;
8038
    if( $steps ) {
8039
        my $pos= $me->[_Pos];
8040
        my $new= $pos + $steps;
8041
        $new= 0   if  $pos  &&  $new < 0;
8042
        $me->Reset( $new )
8043
    }
8044
    return $me->[_Pos];
8045
}
8046
 
8047
sub Prev {
8048
    my( $me, $steps )= @_;
8049
    $steps= 1   if  ! defined $steps;
8050
    my $pos= $me->Next(-$steps);
8051
    $pos -= $me->[_End]   if  $pos;
8052
    return $pos;
8053
}
8054
 
8055
sub Diff {
8056
    my( $me )= @_;
8057
    $me->_ChkPos();
8058
    return 0   if  $me->[_Same] == ( 1 & $me->[_Pos] );
8059
    my $ret= 0;
8060
    my $off= $me->[_Off];
8061
    for my $seq ( 1, 2 ) {
8062
        $ret |= $seq
8063
            if  $me->[_Idx][ $off + $seq + _Min ]
8064
            <   $me->[_Idx][ $off + $seq ];
8065
    }
8066
    return $ret;
8067
}
8068
 
8069
sub Min {
8070
    my( $me, $seq, $base )= @_;
8071
    $me->_ChkPos();
8072
    my $off= $me->_ChkSeq($seq);
8073
    $base= $me->[_Base] if !defined $base;
8074
    return $base + $me->[_Idx][ $off + _Min ];
8075
}
8076
 
8077
sub Max {
8078
    my( $me, $seq, $base )= @_;
8079
    $me->_ChkPos();
8080
    my $off= $me->_ChkSeq($seq);
8081
    $base= $me->[_Base] if !defined $base;
8082
    return $base + $me->[_Idx][ $off ] -1;
8083
}
8084
 
8085
sub Range {
8086
    my( $me, $seq, $base )= @_;
8087
    $me->_ChkPos();
8088
    my $off = $me->_ChkSeq($seq);
8089
    if( !wantarray ) {
8090
        return  $me->[_Idx][ $off ]
8091
            -   $me->[_Idx][ $off + _Min ];
8092
    }
8093
    $base= $me->[_Base] if !defined $base;
8094
    return  ( $base + $me->[_Idx][ $off + _Min ] )
8095
        ..  ( $base + $me->[_Idx][ $off ] - 1 );
8096
}
8097
 
8098
sub Items {
8099
    my( $me, $seq )= @_;
8100
    $me->_ChkPos();
8101
    my $off = $me->_ChkSeq($seq);
8102
    if( !wantarray ) {
8103
        return  $me->[_Idx][ $off ]
8104
            -   $me->[_Idx][ $off + _Min ];
8105
    }
8106
    return
8107
        @{$me->[$seq]}[
8108
                $me->[_Idx][ $off + _Min ]
8109
            ..  ( $me->[_Idx][ $off ] - 1 )
8110
        ];
8111
}
8112
 
8113
sub Same {
8114
    my( $me )= @_;
8115
    $me->_ChkPos();
8116
    return wantarray ? () : 0
8117
        if  $me->[_Same] != ( 1 & $me->[_Pos] );
8118
    return $me->Items(1);
8119
}
8120
 
8121
my %getName;
8122
BEGIN {
8123
    %getName= (
8124
        same => \&Same,
8125
        diff => \&Diff,
8126
        base => \&Base,
8127
        min  => \&Min,
8128
        max  => \&Max,
8129
        range=> \&Range,
8130
        items=> \&Items, # same thing
8131
    );
8132
}
8133
 
8134
sub Get
8135
{
8136
    my $me= shift @_;
8137
    $me->_ChkPos();
8138
    my @value;
8139
    for my $arg (  @_  ) {
8140
        for my $word (  split ' ', $arg  ) {
8141
            my $meth;
8142
            if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
8143
                ||  not  $meth= $getName{ lc $2 }
8144
            ) {
8145
                Die( $Root, ", Get: Invalid request ($word)" );
8146
            }
8147
            my( $base, $name, $seq )= ( $1, $2, $3 );
8148
            push @value, scalar(
8149
                4 == length($name)
8150
                    ? $meth->( $me )
8151
                    : $meth->( $me, $seq, $base )
8152
            );
8153
        }
8154
    }
8155
    if(  wantarray  ) {
8156
        return @value;
8157
    } elsif(  1 == @value  ) {
8158
        return $value[0];
8159
    }
8160
    Die( 0+@value, " values requested from ",
8161
        $Root, "'s Get in scalar context" );
8162
}
8163
 
8164
 
8165
my $Obj= getObjPkg($Root);
8166
no strict 'refs';
8167
 
8168
for my $meth (  qw( new getObjPkg )  ) {
8169
    *{$Root."::".$meth} = \&{$meth};
8170
    *{$Obj ."::".$meth} = \&{$meth};
8171
}
8172
for my $meth (  qw(
8173
    Next Prev Reset Copy Base Diff
8174
    Same Items Range Min Max Get
8175
    _ChkPos _ChkSeq
8176
)  ) {
8177
    *{$Obj."::".$meth} = \&{$meth};
8178
}
8179
 
8180
1;
8181
# This version released by Tye McQueen (http://perlmonks.org/?node=tye).
8182
# 
8183
# =head1 LICENSE
8184
# 
8185
# Parts Copyright (c) 2000-2004 Ned Konz.  All rights reserved.
8186
# Parts by Tye McQueen.
8187
# 
8188
# This program is free software; you can redistribute it and/or modify it
8189
# under the same terms as Perl.
8190
# 
8191
# =head1 MAILING LIST
8192
# 
8193
# Mark-Jason still maintains a mailing list.  To join a low-volume mailing
8194
# list for announcements related to diff and Algorithm::Diff, send an
8195
# empty mail message to mjd-perl-diff-request@plover.com.
8196
# =head1 CREDITS
8197
# 
8198
# Versions through 0.59 (and much of this documentation) were written by:
8199
# 
8200
# Mark-Jason Dominus, mjd-perl-diff@plover.com
8201
# 
8202
# This version borrows some documentation and routine names from
8203
# Mark-Jason's, but Diff.pm's code was completely replaced.
8204
# 
8205
# This code was adapted from the Smalltalk code of Mario Wolczko
8206
# <mario@wolczko.com>, which is available at
8207
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
8208
# 
8209
# C<sdiff> and C<traverse_balanced> were written by Mike Schilli
8210
# <m@perlmeister.com>.
8211
# 
8212
# The algorithm is that described in
8213
# I<A Fast Algorithm for Computing Longest Common Subsequences>,
8214
# CACM, vol.20, no.5, pp.350-353, May 1977, with a few
8215
# minor improvements to improve the speed.
8216
# 
8217
# Much work was done by Ned Konz (perl@bike-nomad.com).
8218
# 
8219
# The OO interface and some other changes are by Tye McQueen.
8220
# 
8221
EOAlgDiff
8222
# 2}}}
8223
    my $problems        = 0;
8224
    $HAVE_Algorith_Diff = 0;
8225
    my $dir             = "";
8226
    if ($opt_sdir) {
8227
        ++$TEMP_OFF;
8228
        $dir = "$opt_sdir/$TEMP_OFF";
8229
        File::Path::rmtree($dir) if     is_dir($dir);
8230
        File::Path::mkpath($dir) unless is_dir($dir);
8231
    } else {
8232
        # let File::Temp create a suitable temporary directory
8233
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
8234
        $TEMP_INST{ $dir } = "Algorithm::Diff";
8235
    }
8236
    print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
8237
    my $Algorithm_dir      = "$dir/Algorithm";
8238
    my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
8239
    mkdir $Algorithm_dir     ;
8240
    mkdir $Algorithm_Diff_dir;
8241
 
8242
    my $OUT = new IO::File "$dir/Algorithm/Diff.pm", "w";
8243
    if (defined $OUT) {
8244
        print $OUT $Algorithm_Diff_Contents;
8245
        $OUT->close;
8246
    } else {
8247
        warn "Failed to install Algorithm/Diff.pm\n";
8248
        $problems = 1;
8249
    }
8250
 
8251
    push @INC, $dir;  # between this & Regexp::Common only need to do once
8252
    eval "use Algorithm::Diff qw / sdiff /";
8253
    $HAVE_Algorith_Diff = 1 unless $problems;
8254
} # 1}}}
8255
sub call_regexp_common {                     # {{{1
8256
    my ($ra_lines, $language ) = @_;
8257
    print "-> call_regexp_common\n" if $opt_v > 2;
8258
 
8259
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
8260
 
8261
    my $all_lines = join("", @{$ra_lines});
8262
 
8263
    no strict 'vars';
8264
    # otherwise get:
8265
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
8266
    if ($all_lines =~ $RE{comment}{$language}) {
8267
        # Suppress "Use of uninitialized value in regexp compilation" that
8268
        # pops up when $1 is undefined--happens if there's a bug in the $RE
8269
        # This Pascal comment will trigger it:
8270
        #         (* This is { another } test. **)
8271
        # Curiously, testing for "defined $1" breaks the substitution.
8272
        no warnings; 
8273
        # remove   comments
8274
        $all_lines =~ s/$1//g;
8275
    }
8276
    # a bogus use of %RE to avoid:
8277
    # Name "main::RE" used only once: possible typo at cloc line xx.
8278
    print scalar keys %RE if $opt_v < -20;
8279
#?#print "$all_lines\n";
8280
    print "<- call_regexp_common\n" if $opt_v > 2;
8281
    return split("\n", $all_lines);
8282
} # 1}}}
8283
sub plural_form {                            # {{{1
8284
    # For getting the right plural form on some English nouns.
8285
    my $n = shift @_;
8286
    if ($n == 1) { return ( 1, "" ); }
8287
    else         { return ($n, "s"); }
8288
} # 1}}}
8289
sub matlab_or_objective_C {                  # {{{1
8290
    # Decide if code is MATLAB, Objective C, MUMPS, or Mercury
8291
    my ($file        , # in
8292
        $rh_Err      , # in   hash of error codes
8293
        $raa_errors  , # out
8294
        $rs_language , # out
8295
       ) = @_;
8296
    print "-> matlab_or_objective_C\n" if $opt_v > 2;
8297
    # matlab markers:
8298
    #   first line starts with "function"
8299
    #   some lines start with "%"
8300
    #   high marks for lines that start with [
8301
    #
8302
    # Objective C markers:
8303
    #   must have at least two brace characters, { }
8304
    #   has /* ... */ style comments
8305
    #   some lines start with @
8306
    #   some lines start with #include
8307
    #
8308
    # MUMPS:
8309
    #   has ; comment markers
8310
    #   do not match:  \w+\s*=\s*\w
8311
    #   lines begin with   \s*\.?\w+\s+\w
8312
    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
8313
    #
8314
    # Mercury:
8315
    #   any line that begins with :- immediately triggers this 
8316
 
8317
    ${$rs_language} = "";
8318
    my $IN = new IO::File $file, "r";
8319
    if (!defined $IN) {
8320
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8321
        return;
8322
    }
8323
 
8324
    my $DEBUG              = 0;
8325
 
8326
    my $matlab_points      = 0;
8327
    my $objective_C_points = 0;
8328
    my $mumps_points       = 0;
8329
    my $mercury_points     = 0;
8330
    my $has_braces         = 0;
8331
    while (<$IN>) {
8332
        ++$has_braces if $_ =~ m/[{}]/;
8333
#print "LINE $. has_braces=$has_braces\n";
8334
        ++$mumps_points if $. == 1 and m{^[A-Z]};
8335
        if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
8336
            $objective_C_points += 5;
8337
            $matlab_points      -= 5;
8338
printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8339
        } elsif (m{^:-\s+}) {      # gotta be mercury
8340
            $mercury_points = 1000;
8341
            last;
8342
        } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
8343
            $matlab_points += 5;
8344
printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8345
        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
8346
            --$mumps_points;
8347
printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8348
        } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
8349
            ++$mumps_points;
8350
printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8351
        } elsif (m{^\s*;}) {
8352
            ++$mumps_points;
8353
printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8354
        } elsif (m{^\s*#(include|import)}) {
8355
            # Objective C without a doubt
8356
            $objective_C_points = 1000;
8357
            $matlab_points      = 0;
8358
printf ".m: #includ obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8359
            $has_braces         = 2;
8360
            last;
8361
        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
8362
            # Objective C without a doubt
8363
            $objective_C_points = 1000;
8364
            $matlab_points      = 0;
8365
printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8366
            last;
8367
        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
8368
            $matlab_points += 5;
8369
printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8370
        } elsif (m{^\sK(ill)?\s+}) {
8371
            $mumps_points  += 5;
8372
printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8373
        } elsif (m{^\s*function}) {
8374
            --$objective_C_points;
8375
            ++$matlab_points;
8376
printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8377
        } elsif (m{^\s*%}) {              #   %
8378
            # matlab commented line
8379
            --$objective_C_points;
8380
            ++$matlab_points;
8381
printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8382
        }
8383
    }
8384
    $IN->close;
8385
printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8386
 
8387
    # next heuristic is unreliable for small files
8388
#   $objective_C_points = -9.9e20 unless $has_braces >= 2;
8389
 
8390
    my %points = ( 'MATLAB'      => $matlab_points     ,
8391
                   'MUMPS'       => $mumps_points      ,
8392
                   'Objective C' => $objective_C_points,
8393
                   'Mercury'     => $mercury_points    , );
8394
 
8395
    ${$rs_language} = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
8396
 
8397
    print "<- matlab_or_objective_C($file: matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
8398
        if $opt_v > 2;
8399
 
8400
} # 1}}}
8401
sub Lisp_or_OpenCL {                         # {{{1
8402
    my ($file        , # in
8403
        $rh_Err      , # in   hash of error codes
8404
        $raa_errors  , # out
8405
       ) = @_;
8406
 
8407
    print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
8408
 
8409
    my $lang = undef;
8410
    my $IN = new IO::File $file, "r";
8411
    if (!defined $IN) {
8412
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8413
        return $lang;
8414
    }
8415
    my $lisp_points   = 0;
8416
    my $opcl_points = 0;
8417
    while (<$IN>) {
8418
        ++$lisp_points if  /^\s*;/;
8419
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
8420
        ++$opcl_points if  /^\s*(int|float|const|{)/;
8421
    }
8422
    $IN->close;
8423
    # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
8424
    if ($lisp_points > $opcl_points) {
8425
        $lang = "Lisp";
8426
    } else {
8427
        $lang = "OpenCL";
8428
    }
8429
 
8430
    print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
8431
    return $lang;
8432
} # 1}}}
8433
sub Lisp_or_Julia {                          # {{{1
8434
    my ($file        , # in
8435
        $rh_Err      , # in   hash of error codes
8436
        $raa_errors  , # out
8437
       ) = @_;
8438
 
8439
    print "-> Lisp_or_Julia\n" if $opt_v > 2;
8440
 
8441
    my $lang = undef;
8442
    my $IN = new IO::File $file, "r";
8443
    if (!defined $IN) {
8444
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8445
        return $lang;
8446
    }
8447
    my $lisp_points   = 0;
8448
    my $julia_points = 0;
8449
    while (<$IN>) {
8450
        ++$lisp_points if  /^\s*;/;
8451
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
8452
        ++$julia_points if  /^\s*(function|end|println|for|while)/;
8453
    }
8454
    $IN->close;
8455
    # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
8456
    if ($lisp_points > $julia_points) {
8457
        $lang = "Lisp";
8458
    } else {
8459
        $lang = "Julia";
8460
    }
8461
 
8462
    print "<- Lisp_or_Julia\n" if $opt_v > 2;
8463
    return $lang;
8464
} # 1}}}
8465
sub Perl_or_Prolog {                         # {{{1
8466
    my ($file        , # in
8467
        $rh_Err      , # in   hash of error codes
8468
        $raa_errors  , # out
8469
       ) = @_;
8470
 
8471
    print "-> Perl_or_Prolog\n" if $opt_v > 2;
8472
 
8473
    my $lang = undef;
8474
    my $IN = new IO::File $file, "r";
8475
    if (!defined $IN) {
8476
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8477
        return $lang;
8478
    }
8479
    my $perl_points = 0;
8480
    my $prolog_points = 0;
8481
    while (<$IN>) {
8482
        ++$perl_points if  /;\s*$/;
8483
        ++$perl_points if  /({|})/;
8484
        ++$perl_points if  /^\s*sub\s+/;
8485
        ++$prolog_points if /\.\s*$/;
8486
        ++$prolog_points if /:-/;
8487
    }
8488
    $IN->close;
8489
    # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
8490
    if ($perl_points > $prolog_points) {
8491
        $lang = "Perl";
8492
    } else {
8493
        $lang = "Prolog";
8494
    }
8495
 
8496
    print "<- Perl_or_Prolog\n" if $opt_v > 2;
8497
    return $lang;
8498
} # 1}}}
8499
sub IDL_or_QtProject {                         # {{{1
8500
    # also Prolog
8501
    my ($file        , # in
8502
        $rh_Err      , # in   hash of error codes
8503
        $raa_errors  , # out
8504
       ) = @_;
8505
 
8506
    print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
8507
 
8508
    my $lang = undef;
8509
    my $IN = new IO::File $file, "r";
8510
    if (!defined $IN) {
8511
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8512
        return $lang;
8513
    }
8514
    my $idl_points    = 0;
8515
    my $qtproj_points = 0;
8516
    my $prolog_points = 0;
8517
    while (<$IN>) {
8518
        ++$idl_points    if /^\s*;/;
8519
        ++$idl_points    if /plot\(/i;
8520
        ++$qtproj_points if /^\s*(qt|configs|sources)\s*\+?=/i;
8521
        ++$prolog_points if /\.\s*$/;
8522
        ++$prolog_points if /:-/;
8523
    }
8524
    $IN->close;
8525
    # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
8526
 
8527
    if ($idl_points > $qtproj_points) {
8528
        $lang = "IDL";
8529
    } else {
8530
        $lang = "Qt Project";
8531
    }
8532
 
8533
    my %points = ( 'IDL'        => $idl_points     ,
8534
                   'Qt Project' => $qtproj_points  ,
8535
                   'Prolog'     => $prolog_points  , );
8536
 
8537
    $lang = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
8538
 
8539
    print "<- IDL_or_QtProject(idl_points=$idl_points, ",
8540
          "qtproj_points=$qtproj_points, prolog_points=$prolog_points)\n" 
8541
           if $opt_v > 2;
8542
    return $lang;
8543
} # 1}}}
8544
sub Ant_or_XML {                             # {{{1
8545
    my ($file        , # in
8546
        $rh_Err      , # in   hash of error codes
8547
        $raa_errors  , # out
8548
       ) = @_;
8549
 
8550
    print "-> Ant_or_XML($file)\n" if $opt_v > 2;
8551
 
8552
    my $lang = "XML";
8553
    my $IN = new IO::File $file, "r";
8554
    if (!defined $IN) {
8555
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8556
        return $lang;
8557
    }
8558
    my $Ant_points   = 0;
8559
    my $XML_points   = 1;
8560
    while (<$IN>) {
8561
        if (/^\s*<project\s+/) {
8562
            ++$Ant_points  ;
8563
            --$XML_points  ;
8564
        }
8565
        if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
8566
            ++$Ant_points  ;
8567
            --$XML_points  ;
8568
        }
8569
    }
8570
    $IN->close;
8571
 
8572
    if ($XML_points >= $Ant_points) {
8573
        # tie or better goes to XML
8574
        $lang = "XML";
8575
    } else {
8576
        $lang = "Ant";
8577
    }
8578
 
8579
    print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
8580
    return $lang;
8581
} # 1}}}
8582
sub Maven_or_XML {                           # {{{1
8583
    my ($file        , # in
8584
        $rh_Err      , # in   hash of error codes
8585
        $raa_errors  , # out
8586
       ) = @_;
8587
 
8588
    print "-> Maven_or_XML($file)\n" if $opt_v > 2;
8589
 
8590
    my $lang = "XML";
8591
    my $IN = new IO::File $file, "r";
8592
    if (!defined $IN) {
8593
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8594
        return $lang;
8595
    }
8596
    my $Mvn_points   = 0;
8597
    my $XML_points   = 1;
8598
    while (<$IN>) {
8599
        if (/^\s*<project\s+/) {
8600
            ++$Mvn_points  ;
8601
            --$XML_points  ;
8602
        }
8603
        if (m{xmlns="http://maven.apache.org/POM/}) {
8604
            ++$Mvn_points  ;
8605
            --$XML_points  ;
8606
        }
8607
    }
8608
    $IN->close;
8609
 
8610
    if ($XML_points >= $Mvn_points) {
8611
        # tie or better goes to XML
8612
        $lang = "XML";
8613
    } else {
8614
        $lang = "Maven";
8615
    }
8616
 
8617
    print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
8618
    return $lang;
8619
} # 1}}}
8620
sub pascal_or_puppet {                       # {{{1
8621
    # Decide if code is Pascal or Puppet manifest
8622
    my ($file        , # in
8623
        $rh_Err      , # in   hash of error codes
8624
        $raa_errors  , # out
8625
        $rs_language , # out
8626
       ) = @_;
8627
 
8628
    print "-> pascal_or_puppet\n" if $opt_v > 2;
8629
 
8630
    ${$rs_language} = "";
8631
    my $IN = new IO::File $file, "r";
8632
    if (!defined $IN) {
8633
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8634
        return;
8635
    }
8636
 
8637
    my $DEBUG              = 0;
8638
    my $pascal_points      = 0;
8639
    my $puppet_points      = 0;
8640
 
8641
    while (<$IN>) {
8642
        ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
8643
        ++$pascal_points if /\bunit\s+[A-Za-z]/i;
8644
        ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
8645
        ++$pascal_points if /\bprocedure\b/i;
8646
        ++$pascal_points if /\bfunction\b/i;
8647
        ++$pascal_points if /^\s*interface\s+/i;
8648
        ++$pascal_points if /^\s*implementation\s+/i;
8649
        ++$pascal_points if /\bbegin\b/i;
8650
        ++$pascal_points if /\bend\b/i;
8651
 
8652
        ++$puppet_points if /^\s*class\s+/;
8653
        ++$puppet_points if /^\s*case\s+/;
8654
        ++$puppet_points if /^\s*package\s+/;
8655
        ++$puppet_points if /^\s*file\s+/;
8656
        ++$puppet_points if /^\s*service\s+/;
8657
    }
8658
    $IN->close;
8659
 
8660
    print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
8661
        if $opt_v > 2;
8662
 
8663
    if ($pascal_points > $puppet_points) {
8664
        ${$rs_language} = "Pascal";
8665
    } else {
8666
        ${$rs_language} = "Puppet";
8667
    }
8668
 
8669
} # 1}}}
8670
sub html_colored_text {                      # {{{1
8671
    # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
8672
    my ($color, $text) = @_;
8673
#?#die "html_colored_text($text)";
8674
    if      ($color =~ /^red$/i)   {
8675
        $color = "#ff0000";
8676
    } elsif ($color =~ /^green$/i) {
8677
        $color = "#00ff00";
8678
    } elsif ($color =~ /^blue$/i)  {
8679
        $color = "#0000ff";
8680
    } elsif ($color =~ /^grey$/i)  {
8681
        $color = "#cccccc";
8682
    }
8683
#   return "" unless $text;
8684
    return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
8685
} # 1}}}
8686
sub xml_metachars {                          # {{{1
8687
    # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
8688
    my ($string, ) = shift @_;
8689
 
8690
    my  @in_chars    = split(//, $string);
8691
    my  @out_chars   = ();
8692
    foreach my $c (@in_chars) {
8693
        if      ($c eq '&') { push @out_chars, '&amp;'
8694
        } elsif ($c eq '<') { push @out_chars, '&lt;'
8695
        } elsif ($c eq '>') { push @out_chars, '&gt;'
8696
        } elsif ($c eq '"') { push @out_chars, '&quot;'
8697
        } elsif ($c eq "'") { push @out_chars, '&apos;'
8698
        } else {
8699
            push @out_chars, $c;
8700
        }
8701
    }
8702
    return join "", @out_chars; 
8703
} # 1}}}
8704
sub html_metachars {                         # {{{1
8705
    # Replace HTML metacharacters with their printable forms.
8706
    # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
8707
    # from Fabiano Reese Righetti's HTML::Encoder module if 
8708
    # this subroutine proves to be too simplistic.
8709
    my ($string, ) = shift @_;
8710
 
8711
    my  @in_chars    = split(//, $string);
8712
    my  @out_chars   = ();
8713
    foreach my $c (@in_chars) {
8714
        if      ($c eq '<') {
8715
            push @out_chars, '&lt;'
8716
        } elsif ($c eq '>') {
8717
            push @out_chars, '&gt;'
8718
        } elsif ($c eq '&') {
8719
            push @out_chars, '&amp;'
8720
        } else {
8721
            push @out_chars, $c;
8722
        }
8723
    }
8724
    return join "", @out_chars; 
8725
} # 1}}}
8726
sub test_alg_diff {                          # {{{1
8727
    my ($file_1 ,
8728
        $file_2 )
8729
       = @_;
8730
    my $fh_1 = new IO::File $file_1, "r";
8731
    die "Unable to read $file_1:  $!\n" unless defined $fh_1;
8732
    chomp(my @lines_1 = <$fh_1>);
8733
    $fh_1->close;
8734
 
8735
    my $fh_2 = new IO::File $file_2, "r";
8736
    die "Unable to read $file_2:  $!\n" unless defined $fh_2;
8737
    chomp(my @lines_2 = <$fh_2>);
8738
    $fh_2->close;
8739
 
8740
    my $n_no_change = 0;
8741
    my $n_modified  = 0;
8742
    my $n_added     = 0;
8743
    my $n_deleted   = 0;
8744
    my @min_sdiff   = ();
8745
my $NN = chr(27) . "[0m";  # normal
8746
my $BB = chr(27) . "[1m";  # bold
8747
 
8748
    my @sdiffs = sdiff( \@lines_1, \@lines_2 );
8749
    foreach my $entry (@sdiffs) {
8750
        my ($out_1, $out_2) = ('', '');
8751
        if ($entry->[0] eq 'u') { 
8752
            ++$n_no_change; 
8753
          # $out_1 = $entry->[1];
8754
          # $out_2 = $entry->[2];
8755
            next; 
8756
        }
8757
#       push @min_sdiff, $entry;
8758
        if      ($entry->[0] eq 'c') { 
8759
            ++$n_modified;
8760
            ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
8761
            $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
8762
            $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
8763
          # $out_1 =~ s/\cA//g;
8764
          # $out_2 =~ s/\cA//g;
8765
        } elsif ($entry->[0] eq '+') { 
8766
            ++$n_added;
8767
            $out_1 = $entry->[1];
8768
            $out_2 = $entry->[2];
8769
        } elsif ($entry->[0] eq '-') { 
8770
            ++$n_deleted;
8771
            $out_1 = $entry->[1];
8772
            $out_2 = $entry->[2];
8773
        } elsif ($entry->[0] eq 'u') { 
8774
        } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
8775
        printf "%-80s | %s\n", $out_1, $out_2;
8776
    }
8777
 
8778
#   foreach my $entry (@min_sdiff) {
8779
#       printf "DIFF:  %s  %s  %s\n", @{$entry};
8780
#   }
8781
} # 1}}}
8782
sub write_comments_to_html {                 # {{{1
8783
    my ($filename      , # in
8784
        $rah_diff_L    , # in  see routine array_diff() for explanation
8785
        $rah_diff_R    , # in  see routine array_diff() for explanation
8786
        $rh_blank      , # in  location and counts of blank lines
8787
       ) = @_;
8788
 
8789
    print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
8790
    my $file = $filename . ".html";
8791
#use Data::Dumper;
8792
#print Dumper("rah_diff_L", $rah_diff_L, "rah_diff_R", $rah_diff_R);
8793
    my $OUT = new IO::File $file, "w";
8794
    if (!defined $OUT) {
8795
        warn "Unable to write to $file\n";
8796
        print "<- write_comments_to_html\n" if $opt_v > 2;
8797
        return;
8798
    }
8799
 
8800
    my $approx_line_count = scalar @{$rah_diff_L};
8801
    my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
8802
 
8803
    my $html_out = html_header($filename);
8804
 
8805
    my $comment_line_number = 0;
8806
    for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
8807
        if (defined $rh_blank->{$i}) {
8808
            foreach (1..$rh_blank->{$i}) {
8809
                $html_out .= "<!-- blank -->\n";
8810
            }
8811
        }
8812
        my $line_num = "";
8813
        my $pre      = "";
8814
        my $post     = '</span> &nbsp;';
8815
warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
8816
        if ($rah_diff_R->[$i]{type} eq 'nonexist') {
8817
            ++$comment_line_number;
8818
            $line_num = sprintf "\&nbsp; <span class=\"clinenum\"> %0${n_digits}d %s",
8819
                            $comment_line_number, $post;
8820
            $pre = '<span class="comment">';
8821
            $html_out .= $line_num;  
8822
            $html_out .= $pre .  
8823
                         html_metachars($rah_diff_L->[$i]{char}) . 
8824
                         $post . "\n";
8825
            next;
8826
        }
8827
        if      ($rah_diff_R->[$i]{type} eq 'code' and
8828
                 $rah_diff_R->[$i]{desc} eq 'same') {
8829
            # entire line remains as-is
8830
            $line_num = sprintf "\&nbsp; <span class=\"linenum\"> %0${n_digits}d %s",
8831
                            $rah_diff_R->[$i]{lnum}, $post;
8832
            $pre    = '<span class="normal">';
8833
            $html_out .= $line_num;  
8834
            $html_out .= $pre . 
8835
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
8836
#XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
8837
#XX
8838
#XX         $line_num = '<span class="linenum">' .
8839
#XX                      $rah_diff_R->[$i]{lnum} . $post;
8840
#XX         $html_out .= $line_num;  
8841
#XX
8842
#XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}}; 
8843
#XX         my @type    = @{$rah_diff_R->[$i]{char}{type}}; 
8844
#XX         for (my $i = 0; $i < scalar @strings; $i++) {
8845
#XX             if ($type[$i] eq 'u') {
8846
#XX                 $pre = '<span class="normal">';
8847
#XX             } else {
8848
#XX                 $pre = '<span class="comment">';
8849
#XX             }
8850
#XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
8851
#XX         }
8852
# print Dumper(@strings, @type); die;
8853
 
8854
        } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
8855
            $line_num = '<span class="clinenum">' . $comment_line_number . $post;
8856
            # entire line is a comment
8857
            $pre    = '<span class="comment">';
8858
            $html_out .= $pre .
8859
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
8860
        }
8861
#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
8862
        $html_out .= "\n";
8863
    }
8864
 
8865
    $html_out .= html_end();
8866
 
8867
    my $out_file = "$filename.html";
8868
    open  OUT, ">$out_file" or die "Cannot write to $out_file $!\n";
8869
    print OUT $html_out;
8870
    close OUT;
8871
    print "Wrote $out_file\n" unless $opt_quiet;
8872
    $OUT->close;
8873
 
8874
    print "<- write_comments_to_html\n" if $opt_v > 2;
8875
} # 1}}}
8876
sub array_diff {                             # {{{1
8877
    my ($file          , # in  only used for error reporting
8878
        $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
8879
        $ra_lines_R    , # in  array of lines in Right file (no blank lines)
8880
        $mode          , # in  "comment" | "revision"
8881
        $rah_diff_L    , # out
8882
        $rah_diff_R    , # out
8883
        $raa_Errors    , # in/out
8884
       ) = @_;
8885
 
8886
    # This routine operates in two ways:
8887
    # A. Computes diffs of the same file with and without comments.
8888
    #    This is used to classify lines as code, comments, or blank.
8889
    # B. Computes diffs of two revisions of a file.  This method
8890
    #    requires a prior run of method A using the older version
8891
    #    of the file because it needs lines to be classified.
8892
 
8893
    # $rah_diff structure:
8894
    # An array with n entries where n equals the number of lines in 
8895
    # an sdiff of the two files.  Each entry in the array describes
8896
    # the contents of the corresponding line in file Left and file Right:
8897
    #  diff[]{type} = blank | code | code+comment | comment | nonexist
8898
    #        {lnum} = line number within the original file (1-based)
8899
    #        {desc} = same | added | removed | modified
8900
    #        {char} = the input line unless {desc} = 'modified' in
8901
    #                 which case
8902
    #        {char}{strings} = [ substrings ]
8903
    #        {char}{type}    = [ disposition (added, removed, etc)]
8904
    #
8905
 
8906
    @{$rah_diff_L} = ();
8907
    @{$rah_diff_R} = ();
8908
 
8909
    print "-> array_diff()\n" if $opt_v > 2;
8910
    my $COMMENT_MODE = 0;
8911
       $COMMENT_MODE = 1 if $mode eq "comment";
8912
 
8913
#print "array_diff(mode=$mode)\n";
8914
#print Dumper("block left:" , $ra_lines_L);
8915
#print Dumper("block right:", $ra_lines_R);
8916
 
8917
    my @sdiffs = ();
8918
    eval {
8919
        local $SIG{ALRM} = sub { die "alarm\n" };
8920
        alarm $opt_diff_timeout;
8921
        @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
8922
        alarm 0;
8923
    };
8924
    if ($@) {
8925
        # timed out
8926
        die unless $@ eq "alarm\n"; # propagate unexpected errors
8927
        push @{$raa_Errors}, 
8928
             [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
8929
        if ($opt_v) {
8930
          warn "array_diff: diff timeout failure for $file--ignoring\n";
8931
        }
8932
        return;
8933
    }
8934
 
8935
#use Data::Dumper::Simple;
8936
#print Dumper($ra_lines_L, $ra_lines_R, @sdiffs);
8937
#die;
8938
 
8939
    my $n_L        = 0;
8940
    my $n_R        = 0;
8941
    my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
8942
    foreach my $triple (@sdiffs) {
8943
        my $flag   = $triple->[0];
8944
        my $line_L = $triple->[1];
8945
        my $line_R = $triple->[2];
8946
        $rah_diff_L->[$n_sdiff]{char} = $line_L;
8947
        $rah_diff_R->[$n_sdiff]{char} = $line_R;
8948
        if      ($flag eq 'u') {  # u = unchanged
8949
            ++$n_L;
8950
            ++$n_R;
8951
            if ($COMMENT_MODE) {
8952
                # line exists in both with & without comments, must be code
8953
                $rah_diff_L->[$n_sdiff]{type} = "code";
8954
                $rah_diff_R->[$n_sdiff]{type} = "code";
8955
            }
8956
            $rah_diff_L->[$n_sdiff]{desc} = "same";
8957
            $rah_diff_R->[$n_sdiff]{desc} = "same";
8958
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
8959
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
8960
        } elsif ($flag eq 'c') {  # c = changed
8961
# warn "per line sdiff() commented out\n"; if (0) {
8962
            ++$n_L;
8963
            ++$n_R;
8964
 
8965
            if ($COMMENT_MODE) {
8966
                # line has text both with & without comments;
8967
                # count as code
8968
                $rah_diff_L->[$n_sdiff]{type} = "code";
8969
                $rah_diff_R->[$n_sdiff]{type} = "code";
8970
            }
8971
 
8972
            my @chars_L = split '', $line_L;
8973
            my @chars_R = split '', $line_R;
8974
 
8975
#XX         my @inline_sdiffs = sdiff( \@chars_L, \@chars_R );
8976
 
8977
#use Data::Dumper::Simple; 
8978
#if ($n_R == 6 or $n_R == 1 or $n_R == 2) {
8979
#print "L=[$line_L]\n";
8980
#print "R=[$line_R]\n";
8981
#print Dumper(@chars_L, @chars_R, @inline_sdiffs);
8982
#}
8983
#XX         my @index = ();
8984
#XX         foreach my $il_triple (@inline_sdiffs) {
8985
#XX             # make an array of u|c|+|- corresponding
8986
#XX             # to each character
8987
#XX             push @index, $il_triple->[0];
8988
#XX         }
8989
#XX#print Dumper(@index); die;
8990
#XX          # expect problems if arrays @index and $inline_sdiffs[1];
8991
#XX          # (@{$inline_sdiffs->[1]} are the characters of line_L)
8992
#XX          # aren't the same length
8993
#XX          my $prev_type = $index[0];
8994
#XX          my @strings   = ();  # blocks of consecutive code or comment
8995
#XX          my @type      = ();  # u (=code) or c (=comment)
8996
#XX          my $j_str     = 0;
8997
#XX          $strings[$j_str] .= $chars_L[0];
8998
#XX          $type[$j_str] = $prev_type;
8999
#XX          for (my $i = 1; $i < scalar @chars_L; $i++) {
9000
#XX              if ($index[$i] ne $prev_type) {
9001
#XX                  ++$j_str;
9002
#XX#print "change at j_str=$j_str type=$index[$i]\n";
9003
#XX                  $type[$j_str] = $index[$i];
9004
#XX                  $prev_type    = $index[$i];
9005
#XX              }
9006
#XX              $strings[$j_str] .= $chars_L[$i];
9007
#XX          }
9008
# print Dumper(@strings, @type); die;
9009
#XX         delete $rah_diff_R->[$n_sdiff]{char};
9010
#XX         @{$rah_diff_R->[$n_sdiff]{char}{strings}} = @strings;
9011
#XX         @{$rah_diff_R->[$n_sdiff]{char}{type}}    = @type;
9012
            $rah_diff_L->[$n_sdiff]{desc} = "modified";
9013
            $rah_diff_R->[$n_sdiff]{desc} = "modified";
9014
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
9015
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
9016
#}
9017
 
9018
        } elsif ($flag eq '+') {  # + = added
9019
            ++$n_R;
9020
            if ($COMMENT_MODE) {
9021
                # should never get here
9022
                @{$rah_diff_L} = ();
9023
                @{$rah_diff_R} = ();
9024
                push @{$raa_Errors}, 
9025
                     [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
9026
                if ($opt_v) {
9027
                  warn "array_diff: diff failure (diff says the\n";
9028
                  warn "comment-free file has added lines).\n";
9029
                  warn "$n_sdiff  $line_L\n";
9030
                }
9031
                last;
9032
            }
9033
            $rah_diff_L->[$n_sdiff]{type} = "nonexist";
9034
            $rah_diff_L->[$n_sdiff]{desc} = "removed";
9035
            $rah_diff_R->[$n_sdiff]{desc} = "added";
9036
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
9037
        } elsif ($flag eq '-') {  # - = removed
9038
            ++$n_L;
9039
            if ($COMMENT_MODE) {
9040
                # line must be comment because blanks already gone
9041
                $rah_diff_L->[$n_sdiff]{type} = "comment";
9042
            }
9043
            $rah_diff_R->[$n_sdiff]{type} = "nonexist";
9044
            $rah_diff_R->[$n_sdiff]{desc} = "removed";
9045
            $rah_diff_L->[$n_sdiff]{desc} = "added";
9046
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
9047
        }
9048
#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
9049
        ++$n_sdiff;
9050
    }
9051
#use Data::Dumper::Simple;
9052
#print Dumper($rah_diff_L, $rah_diff_R);
9053
 
9054
    print "<- array_diff\n" if $opt_v > 2;
9055
} # 1}}}
9056
sub remove_leading_dir {                     # {{{1 
9057
    my @filenames = @_;
9058
    #
9059
    #  Input should be a list of file names
9060
    #  with the same leading directory such as
9061
    # 
9062
    #      dir1/dir2/a.txt
9063
    #      dir1/dir2/b.txt
9064
    #      dir1/dir2/dir3/c.txt
9065
    #
9066
    #  Output is the same list minus the common
9067
    #  directory path:
9068
    # 
9069
    #      a.txt
9070
    #      b.txt
9071
    #      dir3/c.txt
9072
    #
9073
    print "-> remove_leading_dir()\n" if $opt_v > 2;
9074
    my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
9075
                #               [ dir1, dir2 ],         # dir1/dir2/b.txt
9076
                #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
9077
    if ($ON_WINDOWS) {
9078
        foreach my $F (@filenames) {
9079
            $F =~ s{\\}{/}g;
9080
            $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
9081
        }
9082
    }
9083
    if (scalar @filenames == 1) {
9084
        # special case:  with only one filename
9085
        # cannot determine a baseline, just remove first directory level
9086
        $filenames[0] =~ s{^.*?/}{};
9087
        print "-> $filenames[0]\n";
9088
        return $filenames[0];
9089
    }
9090
    foreach my $F (@filenames) {
9091
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
9092
        my @x = File::Spec->splitdir( $Dir );
9093
        pop @x unless $x[$#x]; # last entry usually null, remove it
9094
        if ($ON_WINDOWS) {
9095
            if (defined($Vol) and $Vol) {
9096
                # put the drive letter, eg, C:, at the front
9097
                unshift @x, uc $Vol;
9098
            }
9099
        }
9100
#print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
9101
        push @D, [ @x ];
9102
    }
9103
 
9104
    # now loop over columns until either they are all
9105
    # eliminated or a unique column is found
9106
 
9107
#use Data::Dumper::Simple;
9108
#print Dumper("remove_leading_dir after ", @D);
9109
 
9110
    my @common   = ();  # to contain the common leading directories
9111
    my $mismatch = 0;
9112
    while (!$mismatch) {
9113
        for (my $row = 1; $row < scalar @D; $row++) {
9114
#print "comparing $D[$row][0] to $D[0][0]\n";
9115
 
9116
            if (!defined $D[$row][0] or !defined $D[0][0] or
9117
                ($D[$row][0] ne $D[0][0])) {
9118
                $mismatch = 1;
9119
                last;
9120
            }
9121
        }
9122
#print "mismatch=$mismatch\n";
9123
        if (!$mismatch) {
9124
            push @common, $D[0][0];
9125
            # all terms in the leading match; unshift the batch
9126
            foreach my $ra (@D) {
9127
                shift @{$ra};
9128
            }
9129
        }
9130
    }
9131
 
9132
    push @common, " ";  # so that $leading will end with "/ "
9133
    my $leading = File::Spec->catdir( @common );
9134
       $leading =~ s{ $}{};  # now take back the bogus appended space
9135
#print "remove_leading_dir leading=[$leading]\n"; die;
9136
    if ($ON_WINDOWS) {
9137
       $leading =~ s{\\}{/}g;
9138
    }
9139
    foreach my $F (@filenames) {
9140
        $F =~ s{^$leading}{};
9141
    }
9142
 
9143
    print "<- remove_leading_dir()\n" if $opt_v > 2;
9144
    return @filenames;
9145
 
9146
} # 1}}}
9147
sub strip_leading_dir {                      # {{{1 
9148
    my ($leading, @filenames) = @_;
9149
    #  removes the string $leading from each entry in @filenames
9150
    print "-> strip_leading_dir()\n" if $opt_v > 2;
9151
 
9152
#print "remove_leading_dir leading=[$leading]\n"; die;
9153
    if ($ON_WINDOWS) {
9154
       $leading =~ s{\\}{/}g;
9155
        foreach my $F (@filenames) {
9156
            $F =~ s{\\}{/}g;
9157
        }
9158
    }
9159
    foreach my $F (@filenames) {
9160
        $F =~ s{^$leading}{};
9161
    }
9162
 
9163
    print "<- strip_leading_dir()\n" if $opt_v > 2;
9164
    return @filenames;
9165
 
9166
} # 1}}}
9167
sub find_deepest_file {                      # {{{1 
9168
    my @filenames = @_;
9169
    #
9170
    #  Input should be a list of file names
9171
    #  with the same leading directory such as
9172
    # 
9173
    #      dir1/dir2/a.txt
9174
    #      dir1/dir2/b.txt
9175
    #      dir1/dir2/dir3/c.txt
9176
    #
9177
    #  Output is the file with the most parent directories:
9178
    # 
9179
    #      dir1/dir2/dir3/c.txt
9180
 
9181
    print "-> find_deepest_file()\n" if $opt_v > 2;
9182
 
9183
    my $deepest    = undef;
9184
    my $max_subdir = -1;
9185
    foreach my $F (sort @filenames) {
9186
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
9187
        my @x = File::Spec->splitdir( $Dir );
9188
        pop @x unless $x[$#x]; # last entry usually null, remove it
9189
        if (scalar @x > $max_subdir) {
9190
            $deepest    = $F;
9191
            $max_subdir = scalar @x;
9192
        }
9193
    }
9194
 
9195
    print "<- find_deepest_file()\n" if $opt_v > 2;
9196
    return $deepest;
9197
 
9198
} # 1}}}
9199
sub find_uncommon_parent_dir {               # {{{1
9200
    my ($file_L, $file_R) = @_;
9201
    #
9202
    # example:
9203
    #
9204
    #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
9205
    #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
9206
    #
9207
    # then return
9208
    #
9209
    #   "perl-5.16.1",
9210
    #   "/tmp/8VxQG0OLbp/perl-5.16.3",
9211
 
9212
    my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
9213
    my @x_L = File::Spec->splitdir( $Dir_L );
9214
    my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
9215
    my @x_R = File::Spec->splitdir( $Dir_R );
9216
 
9217
    my @common  = ();
9218
 
9219
    # work backwards
9220
    while ($x_L[$#x_L] eq $x_R[$#x_R]) {
9221
        push @common, $x_L[$#x_L];
9222
        pop  @x_L;
9223
        pop  @x_R;
9224
    }
9225
    my $success = scalar @common;
9226
 
9227
    my $dirs_L = File::Spec->catdir( @x_L );
9228
    my $dirs_R = File::Spec->catdir( @x_R );
9229
    my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
9230
    my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
9231
 
9232
    return $lead_L, $lead_R, $success;
9233
 
9234
} # 1}}}
9235
sub get_leading_dirs {                       # {{{1
9236
    my ($rh_file_list_L, $rh_file_list_R) = @_;
9237
    # find uniquely named files in both sets to help determine the 
9238
    # leading directory positions
9239
    my %unique_filename = ();
9240
    my %basename_L = ();
9241
    my %basename_R = ();
9242
    foreach my $f (keys %{$rh_file_list_L}) {
9243
        my $bn = basename($f);
9244
        $basename_L{ $bn }{'count'}   += 1; 
9245
        $basename_L{ $bn }{'fullpath'} = $f; 
9246
    }
9247
    foreach my $f (keys %{$rh_file_list_R}) {
9248
        my $bn = basename($f);
9249
        $basename_R{ $bn }{'count'}   += 1;
9250
        $basename_R{ $bn }{'fullpath'} = $f; 
9251
    }
9252
    foreach my $f (keys %basename_L) {
9253
        next unless $basename_L{$f}{'count'} == 1;
9254
        next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
9255
        $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
9256
        $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
9257
    }
9258
    return undef, undef, 0 unless %unique_filename;
9259
 
9260
    my %candidate_leading_dir_L = ();
9261
    my %candidate_leading_dir_R = ();
9262
    foreach my $f (keys %unique_filename) {
9263
        my $fL = $unique_filename{ $f }{'L'};
9264
        my $fR = $unique_filename{ $f }{'R'};
9265
#printf "%-36s -> %-36s\n", $fL, $fR;
9266
        my $ptr_L = length($fL) - 1;
9267
        my $ptr_R = length($fR) - 1;
9268
        my @aL    = split '', $fL;
9269
        my @aR    = split '', $fR;
9270
        while ($ptr_L >= 0 and $ptr_R >= 0) {
9271
            last if $aL[$ptr_L] ne $aR[$ptr_R];
9272
            --$ptr_L;
9273
            --$ptr_R;
9274
        }
9275
#print "ptr_L=$ptr_L   ptr_R=$ptr_R\n";
9276
        my $leading_dir_L = "";
9277
           $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
9278
        my $leading_dir_R = "";
9279
           $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
9280
#print "leading_dir_L=$leading_dir_L   leading_dir_R=$leading_dir_R\n";
9281
        ++$candidate_leading_dir_L{$leading_dir_L};
9282
        ++$candidate_leading_dir_R{$leading_dir_R};
9283
    }
9284
#use Data::Dumper::Simple;
9285
#print Dumper(%candidate_leading_dir_L);
9286
#print Dumper(%candidate_leading_dir_R);
9287
#die;
9288
    my $best_L = (sort {
9289
               $candidate_leading_dir_L{$b} <=> 
9290
               $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
9291
    my $best_R = (sort {
9292
               $candidate_leading_dir_R{$b} <=> 
9293
               $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
9294
    return $best_L, $best_R, 1;
9295
} # 1}}}
9296
sub align_by_pairs {                         # {{{1 
9297
    my ($rh_file_list_L        , # in
9298
        $rh_file_list_R        , # in
9299
        $ra_added              , # out
9300
        $ra_removed            , # out
9301
        $ra_compare_list       , # out
9302
        ) = @_;
9303
    print "-> align_by_pairs()\n" if $opt_v > 2;
9304
    @{$ra_compare_list} = ();
9305
 
9306
    my @files_L = sort keys %{$rh_file_list_L};
9307
    my @files_R = sort keys %{$rh_file_list_R};
9308
    return () unless @files_L or  @files_R;  # at least one must have stuff
9309
    if      ( @files_L and !@files_R) {
9310
        # left side has stuff, right side is empty; everything deleted
9311
        @{$ra_added   }     = ();
9312
        @{$ra_removed }     = @files_L;
9313
        @{$ra_compare_list} = ();
9314
        return;
9315
    } elsif (!@files_L and  @files_R) {
9316
        # left side is empty, right side has stuff; everything added
9317
        @{$ra_added   }     = @files_R;
9318
        @{$ra_removed }     = ();
9319
        @{$ra_compare_list} = ();
9320
        return;
9321
    }
9322
#use Data::Dumper::Simple;
9323
#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
9324
#die;
9325
    if (scalar @files_L == 1 and scalar @files_R == 1) {
9326
        # The easy case:  compare two files.
9327
        push @{$ra_compare_list}, [ $files_L[0],  $files_R[0] ]; 
9328
        @{$ra_added  } = ();
9329
        @{$ra_removed} = ();
9330
        return;
9331
    }
9332
    # The harder case:  compare groups of files.  This only works
9333
    # if the groups are in different directories so the first step
9334
    # is to strip the leading directory names from file lists to
9335
    # make it possible to align by file names.
9336
    my @files_L_minus_dir = undef;
9337
    my @files_R_minus_dir = undef;
9338
 
9339
    my $deepest_file_L    = find_deepest_file(@files_L);
9340
    my $deepest_file_R    = find_deepest_file(@files_R);
9341
#print "deepest L = [$deepest_file_L]\n";
9342
#print "deepest R = [$deepest_file_R]\n";
9343
####my ($leading_dir_L, $leading_dir_R, $success) = 
9344
####    find_uncommon_parent_dir($deepest_file_L, $deepest_file_R);
9345
    my ($leading_dir_L, $leading_dir_R, $success) = 
9346
                get_leading_dirs($rh_file_list_L, $rh_file_list_R);
9347
#print "leading_dir_L=[$leading_dir_L]\n";
9348
#print "leading_dir_R=[$leading_dir_R]\n";
9349
#print "success      =[$success]\n";
9350
    if ($success) {
9351
        @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
9352
        @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
9353
    } else {
9354
        # otherwise fall back to old strategy
9355
        @files_L_minus_dir = remove_leading_dir(@files_L);
9356
        @files_R_minus_dir = remove_leading_dir(@files_R);
9357
    }
9358
 
9359
    # Keys of the stripped_X arrays are canonical file names;
9360
    # should overlap mostly.  Keys in stripped_L but not in
9361
    # stripped_R are files that have been deleted.  Keys in
9362
    # stripped_R but not in stripped_L have been added.
9363
    my %stripped_L = ();
9364
       @stripped_L{ @files_L_minus_dir } = @files_L;
9365
    my %stripped_R = ();
9366
       @stripped_R{ @files_R_minus_dir } = @files_R;
9367
 
9368
    my %common = ();
9369
    foreach my $f (keys %stripped_L) {
9370
        $common{$f}  = 1 if     defined $stripped_R{$f};
9371
    }
9372
 
9373
    my %deleted = ();
9374
    foreach my $f (keys %stripped_L) {
9375
        $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
9376
    }
9377
 
9378
    my %added = ();
9379
    foreach my $f (keys %stripped_R) {
9380
        $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
9381
    }
9382
 
9383
#use Data::Dumper::Simple;
9384
#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
9385
#print Dumper("align_by_pairs", %common, %added, %deleted);
9386
 
9387
    foreach my $f (keys %common) {
9388
        push @{$ra_compare_list}, [ $stripped_L{$f},  
9389
                                    $stripped_R{$f} ]; 
9390
    }
9391
    @{$ra_added   } = keys %added  ;
9392
    @{$ra_removed } = keys %deleted;
9393
 
9394
    print "<- align_by_pairs()\n" if $opt_v > 2;
9395
    return;
9396
#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
9397
#die;
9398
} # 1}}}
9399
sub html_header {                            # {{{1
9400
    my ($title , ) = @_;
9401
 
9402
    print "-> html_header\n" if $opt_v > 2;
9403
    return 
9404
'<html>
9405
<head>
9406
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
9407
<meta name="GENERATOR" content="cloc http://cloc.sourceforge.net">
9408
' .
9409
"
9410
<!-- Created by $script v$VERSION -->
9411
<title>$title</title>
9412
" .
9413
'
9414
<style TYPE="text/css">
9415
<!--
9416
    body {
9417
        color: black;
9418
        background-color: white;
9419
        font-family: monospace
9420
    }
9421
 
9422
    .whitespace {
9423
        background-color: gray;
9424
    }
9425
 
9426
    .comment {
9427
        color: gray;
9428
        font-style: italic;
9429
    }
9430
 
9431
    .clinenum {
9432
        color: red;
9433
    }
9434
 
9435
    .linenum {
9436
        color: green;
9437
    }
9438
 -->
9439
</style>
9440
</head>
9441
<body>
9442
<pre><tt>
9443
';
9444
    print "<- html_header\n" if $opt_v > 2;
9445
} # 1}}}
9446
sub html_end {                               # {{{1
9447
return 
9448
'</tt></pre>
9449
</body>
9450
</html>
9451
';
9452
} # 1}}}
9453
sub die_unknown_lang {                       # {{{1
9454
    my ($lang, $option_name) = @_;
9455
    die "Unknown language '$lang' used with $option_name option.  " .
9456
        "The command\n  $script --show-lang\n" .
9457
        "will print all recognized languages.  Language names are " .
9458
        "case sensitive.\n" ;
9459
} # 1}}}
9460
sub unicode_file {                           # {{{1
9461
    my $file = shift @_; 
9462
 
9463
    print "-> unicode_file($file)\n" if $opt_v > 2;
9464
    return 0 if (-s $file > 2_000_000);  
9465
    # don't bother trying to test binary files bigger than 2 MB
9466
 
9467
    my $IN = new IO::File $file, "r";
9468
    if (!defined $IN) {
9469
        warn "Unable to read $file; ignoring.\n";
9470
        return 0;
9471
    }
9472
    my @lines = <$IN>;
9473
    $IN->close;
9474
 
9475
    if (unicode_to_ascii( join('', @lines) )) {
9476
        print "<- unicode_file()\n" if $opt_v > 2;
9477
        return 1;
9478
    } else {
9479
        print "<- unicode_file()\n" if $opt_v > 2;
9480
        return 0;
9481
    }
9482
 
9483
} # 1}}}
9484
sub unicode_to_ascii {                       # {{{1
9485
    my $string = shift @_; 
9486
 
9487
    # A trivial attempt to convert UTF-16 little or big endian
9488
    # files into ASCII.  These files exhibit the following byte
9489
    # sequence:
9490
    #   byte   1:  255
9491
    #   byte   2:  254
9492
    #   byte   3:  ord of ASCII character
9493
    #   byte   4:    0
9494
    #   byte 3+i:  ord of ASCII character
9495
    #   byte 4+i:    0
9496
    # or
9497
    #   byte   1:  255
9498
    #   byte   2:  254
9499
    #   byte   3:    0
9500
    #   byte   4:  ord of ASCII character
9501
    #   byte 3+i:    0
9502
    #   byte 4+i:  ord of ASCII character
9503
 
9504
    my $length  = length $string;
9505
#print "length=$length\n";
9506
    return '' if $length <= 3;
9507
    my @unicode = split(//, $string);
9508
 
9509
    # check the first 100 characters for big or little endian UTF-16 encoding
9510
    my $max_peek = $length < 200 ? $length : 200;
9511
    my @view_1   = ();
9512
    for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
9513
    my @view_2   = ();
9514
    for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
9515
 
9516
    my $points_1 = 0;
9517
    foreach my $C (@view_1) {
9518
        ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
9519
                                                          or ord($C) == 10
9520
                                                          or ord($C) ==  9;
9521
    }
9522
 
9523
    my $points_2 = 0;
9524
    foreach my $C (@view_2) {
9525
        ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
9526
                                                          or ord($C) == 10
9527
                                                          or ord($C) ==  9;
9528
    }
9529
#print "points 1: $points_1\n";
9530
#print "points 2: $points_2\n";
9531
 
9532
    my $offset = undef;
9533
    if    ($points_1 > 90) { $offset = 2; }
9534
    elsif ($points_2 > 90) { $offset = 3; }
9535
    else                   { return '' }  # neither big or little endian UTF-16
9536
 
9537
    my @ascii              = ();
9538
    for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; }
9539
    return join("", @ascii);
9540
} # 1}}}
9541
sub uncompress_archive_cmd {                 # {{{1
9542
    my ($archive_file, ) = @_;
9543
 
9544
    # Wrap $archive_file in single or double quotes in the system
9545
    # commands below to avoid filename chicanery (including
9546
    # spaces in the names).
9547
 
9548
    print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
9549
    my $extract_cmd = "";
9550
    my $missing     = "";
9551
    if ($opt_extract_with) {
9552
        ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
9553
    } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
9554
        $extract_cmd = "cat > -";
9555
    } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or 
9556
              $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
9557
        if (external_utility_exists("gzip --version")) {
9558
            if (external_utility_exists("tar --version")) {
9559
                $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
9560
            } else {
9561
                $missing = "tar";
9562
            }
9563
        } else {
9564
            $missing = "gzip";
9565
        }
9566
    } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
9567
        if (external_utility_exists("bzip2 --help")) {
9568
            if (external_utility_exists("tar --version")) {
9569
                $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
9570
            } else {
9571
                $missing = "tar";
9572
            }
9573
        } else {
9574
            $missing = "bzip2";
9575
        }
9576
    } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
9577
        if (external_utility_exists("unxz --version")) {
9578
            if (external_utility_exists("tar --version")) {
9579
                $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
9580
            } else {
9581
                $missing = "tar";
9582
            }
9583
        } else {
9584
            $missing = "bzip2";
9585
        }
9586
    } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
9587
        $extract_cmd = "tar xf '$archive_file'";
9588
    } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
9589
        if (external_utility_exists("cpio --version")) {
9590
            if (external_utility_exists("rpm2cpio")) {
9591
                $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
9592
            } else {
9593
                $missing = "rpm2cpio";
9594
            }
9595
        } else {
9596
            $missing = "bzip2";
9597
        }
9598
    } elsif ($archive_file =~ /\.zip$/i and !$ON_WINDOWS)    {
9599
        if (external_utility_exists("unzip")) {
9600
            $extract_cmd = "unzip -qq -d . '$archive_file'";
9601
        } else {
9602
            $missing = "unzip";
9603
        }
9604
    } elsif ($ON_WINDOWS and $archive_file =~ /\.zip$/i) {
9605
        # zip on Windows, guess default Winzip install location
9606
        $extract_cmd = "";
9607
        my $WinZip = '"C:\\Program Files\\WinZip\\WinZip32.exe"';
9608
        if (external_utility_exists($WinZip)) {
9609
            $extract_cmd = "$WinZip -e -o \"$archive_file\" .";
9610
#print "trace 5 extract_cmd=[$extract_cmd]\n";
9611
        } else {
9612
#print "trace 6\n";
9613
            $missing = $WinZip;
9614
        }
9615
    }
9616
    print "<- uncompress_archive_cmd\n" if $opt_v > 2;
9617
    if ($missing) {
9618
        die "Unable to expand $archive_file because external\n",
9619
            "utility '$missing' is not available.\n",
9620
            "Another possibility is to use the --extract-with option.\n";
9621
    } else {
9622
        return $extract_cmd;
9623
    }
9624
}
9625
# 1}}}
9626
sub read_list_file {                         # {{{1
9627
    my ($file, ) = @_;
9628
 
9629
    print "-> read_list_file($file)\n" if $opt_v > 2;
9630
    my $IN = new IO::File $file, "r";
9631
    if (!defined $IN) {
9632
        warn "Unable to read $file; ignoring.\n";
9633
        next;
9634
    }
9635
    my @entry = ();
9636
    while (<$IN>) {
9637
        next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
9638
        s/\cM$//;  # DOS to Unix
9639
        chomp;
9640
        push @entry, $_;
9641
    }
9642
    $IN->close;
9643
 
9644
    print "<- read_list_file\n" if $opt_v > 2;
9645
    return @entry;
9646
}
9647
# 1}}}
9648
sub external_utility_exists {                # {{{1
9649
    my $exe = shift @_;
9650
 
9651
    my $success      = 0;
9652
    if ($ON_WINDOWS) {
9653
        $success = 1 unless system $exe . ' > nul';
9654
    } else {
9655
        $success = 1 unless system $exe . ' >/dev/null 2>&1';
9656
        if (!$success) {
9657
            $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
9658
        }
9659
    }
9660
 
9661
    return $success;
9662
} # 1}}}
9663
sub write_xsl_file {                         # {{{1
9664
    my $OUT = new IO::File $CLOC_XSL, "w";
9665
    if (!defined $OUT) {
9666
        warn "Unable to write $CLOC_XSL  $!\n";
9667
        return;
9668
    }
9669
    my $XSL =             # <style>  </style> {{{2
9670
'<?xml version="1.0" encoding="US-ASCII"?>
9671
<!-- XLS file by Paul Schwann, January 2009.
9672
     Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
9673
     -->
9674
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
9675
  <xsl:output method="html"/>
9676
  <xsl:template match="/">
9677
    <html xmlns="http://www.w3.org/1999/xhtml">
9678
      <head>
9679
        <title>CLOC Results</title>
9680
      </head>
9681
      <style type="text/css">
9682
        table {
9683
          table-layout: auto;
9684
          border-collapse: collapse;
9685
          empty-cells: show;
9686
        }
9687
        td, th {
9688
          padding: 4px;
9689
        }
9690
        th {
9691
          background-color: #CCCCCC;
9692
        }
9693
        td {
9694
          text-align: center;
9695
        }
9696
        table, td, tr, th {
9697
          border: thin solid #999999;
9698
        }
9699
      </style>
9700
      <body>
9701
        <h3><xsl:value-of select="results/header"/></h3>
9702
';
9703
# 2}}}
9704
 
9705
    if ($opt_by_file) {
9706
        $XSL .=             # <table> </table>{{{2
9707
'        <table>
9708
          <thead>
9709
            <tr>
9710
              <th>File</th>
9711
              <th>Blank</th>
9712
              <th>Comment</th>
9713
              <th>Code</th>
9714
              <th>Language</th>
9715
';
9716
        $XSL .=
9717
'             <th>3<sup>rd</sup> Generation Equivalent</th>
9718
              <th>Scale</th>
9719
' if $opt_3;
9720
        $XSL .=
9721
'           </tr>
9722
          </thead>
9723
          <tbody>
9724
          <xsl:for-each select="results/files/file">
9725
            <tr>
9726
              <th><xsl:value-of select="@name"/></th>
9727
              <td><xsl:value-of select="@blank"/></td>
9728
              <td><xsl:value-of select="@comment"/></td>
9729
              <td><xsl:value-of select="@code"/></td>
9730
              <td><xsl:value-of select="@language"/></td>
9731
';
9732
        $XSL .=
9733
'             <td><xsl:value-of select="@factor"/></td>
9734
              <td><xsl:value-of select="@scaled"/></td>
9735
' if $opt_3;
9736
        $XSL .=
9737
'           </tr>
9738
          </xsl:for-each>
9739
            <tr>
9740
              <th>Total</th>
9741
              <th><xsl:value-of select="results/files/total/@blank"/></th>
9742
              <th><xsl:value-of select="results/files/total/@comment"/></th>
9743
              <th><xsl:value-of select="results/files/total/@code"/></th>
9744
              <th><xsl:value-of select="results/files/total/@language"/></th>
9745
';
9746
        $XSL .=
9747
'             <th><xsl:value-of select="results/files/total/@factor"/></th>
9748
              <th><xsl:value-of select="results/files/total/@scaled"/></th>
9749
' if $opt_3;
9750
        $XSL .=
9751
'           </tr>
9752
          </tbody>
9753
        </table>
9754
        <br/>
9755
';
9756
# 2}}}
9757
    }
9758
 
9759
    if (!$opt_by_file or $opt_by_file_by_lang) {
9760
        $XSL .=             # <table> </table> {{{2
9761
'       <table>
9762
          <thead>
9763
            <tr>
9764
              <th>Language</th>
9765
              <th>Files</th>
9766
              <th>Blank</th>
9767
              <th>Comment</th>
9768
              <th>Code</th>
9769
';
9770
        $XSL .=
9771
'             <th>Scale</th>
9772
              <th>3<sup>rd</sup> Generation Equivalent</th>
9773
' if $opt_3;
9774
        $XSL .=
9775
'           </tr>
9776
          </thead>
9777
          <tbody>
9778
          <xsl:for-each select="results/languages/language">
9779
            <tr>
9780
              <th><xsl:value-of select="@name"/></th>
9781
              <td><xsl:value-of select="@files_count"/></td>
9782
              <td><xsl:value-of select="@blank"/></td>
9783
              <td><xsl:value-of select="@comment"/></td>
9784
              <td><xsl:value-of select="@code"/></td>
9785
';
9786
        $XSL .=
9787
'             <td><xsl:value-of select="@factor"/></td>
9788
              <td><xsl:value-of select="@scaled"/></td>
9789
' if $opt_3;
9790
        $XSL .=
9791
'          </tr>
9792
          </xsl:for-each>
9793
            <tr>
9794
              <th>Total</th>
9795
              <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
9796
              <th><xsl:value-of select="results/languages/total/@blank"/></th>
9797
              <th><xsl:value-of select="results/languages/total/@comment"/></th>
9798
              <th><xsl:value-of select="results/languages/total/@code"/></th>
9799
';
9800
        $XSL .=
9801
'             <th><xsl:value-of select="results/languages/total/@factor"/></th>
9802
              <th><xsl:value-of select="results/languages/total/@scaled"/></th>
9803
' if $opt_3;
9804
        $XSL .=
9805
'           </tr>
9806
          </tbody>
9807
        </table>
9808
';
9809
# 2}}}
9810
    }
9811
 
9812
    $XSL.= <<'EO_XSL'; # {{{2
9813
      </body>
9814
    </html>
9815
  </xsl:template>
9816
</xsl:stylesheet>
9817
 
9818
EO_XSL
9819
# 2}}}
9820
 
9821
    my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
9822
<?xml version="1.0" encoding="US-ASCII"?>
9823
<!-- XLS file by Blazej Kroll, November 2010 -->
9824
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
9825
  <xsl:output method="html"/>
9826
  <xsl:template match="/">
9827
    <html xmlns="http://www.w3.org/1999/xhtml">
9828
      <head>
9829
        <title>CLOC Results</title>
9830
      </head>
9831
      <style type="text/css">
9832
        table {
9833
          table-layout: auto;
9834
          border-collapse: collapse;
9835
          empty-cells: show;
9836
          margin: 1em;
9837
        }
9838
        td, th {
9839
          padding: 4px;
9840
        }
9841
        th {
9842
          background-color: #CCCCCC;
9843
        }
9844
        td {
9845
          text-align: center;
9846
        }
9847
        table, td, tr, th {
9848
          border: thin solid #999999;
9849
        }
9850
      </style>
9851
      <body>
9852
        <h3><xsl:value-of select="results/header"/></h3>
9853
EO_DIFF_XSL
9854
# 2}}}
9855
 
9856
    if ($opt_by_file) {
9857
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
9858
        <table>
9859
          <thead>
9860
          <tr><th colspan="4">Same</th>
9861
          </tr>
9862
            <tr>
9863
              <th>File</th>
9864
              <th>Blank</th>
9865
              <th>Comment</th>
9866
              <th>Code</th>              
9867
            </tr>
9868
          </thead>
9869
          <tbody>
9870
          <xsl:for-each select="diff_results/same/file">
9871
            <tr>
9872
              <th><xsl:value-of select="@name"/></th>
9873
              <td><xsl:value-of select="@blank"/></td>
9874
              <td><xsl:value-of select="@comment"/></td>
9875
              <td><xsl:value-of select="@code"/></td>              
9876
            </tr>
9877
          </xsl:for-each>            
9878
          </tbody>
9879
        </table>
9880
 
9881
        <table>
9882
          <thead>
9883
          <tr><th colspan="4">Modified</th>
9884
          </tr>
9885
            <tr>
9886
              <th>File</th>
9887
              <th>Blank</th>
9888
              <th>Comment</th>
9889
              <th>Code</th>              
9890
            </tr>
9891
          </thead>
9892
          <tbody>
9893
          <xsl:for-each select="diff_results/modified/file">
9894
            <tr>
9895
              <th><xsl:value-of select="@name"/></th>
9896
              <td><xsl:value-of select="@blank"/></td>
9897
              <td><xsl:value-of select="@comment"/></td>
9898
              <td><xsl:value-of select="@code"/></td>              
9899
            </tr>
9900
          </xsl:for-each>            
9901
          </tbody>
9902
        </table>
9903
 
9904
        <table>
9905
          <thead>
9906
          <tr><th colspan="4">Added</th>
9907
          </tr>
9908
            <tr>
9909
              <th>File</th>
9910
              <th>Blank</th>
9911
              <th>Comment</th>
9912
              <th>Code</th>              
9913
            </tr>
9914
          </thead>
9915
          <tbody>
9916
          <xsl:for-each select="diff_results/added/file">
9917
            <tr>
9918
              <th><xsl:value-of select="@name"/></th>
9919
              <td><xsl:value-of select="@blank"/></td>
9920
              <td><xsl:value-of select="@comment"/></td>
9921
              <td><xsl:value-of select="@code"/></td>              
9922
            </tr>
9923
          </xsl:for-each>            
9924
          </tbody>
9925
        </table>
9926
 
9927
        <table>
9928
          <thead>
9929
          <tr><th colspan="4">Removed</th>
9930
          </tr>
9931
            <tr>
9932
              <th>File</th>
9933
              <th>Blank</th>
9934
              <th>Comment</th>
9935
              <th>Code</th>              
9936
            </tr>
9937
          </thead>
9938
          <tbody>
9939
          <xsl:for-each select="diff_results/removed/file">
9940
            <tr>
9941
              <th><xsl:value-of select="@name"/></th>
9942
              <td><xsl:value-of select="@blank"/></td>
9943
              <td><xsl:value-of select="@comment"/></td>
9944
              <td><xsl:value-of select="@code"/></td>              
9945
            </tr>
9946
          </xsl:for-each>            
9947
          </tbody>
9948
        </table>
9949
EO_DIFF_XSL
9950
# 2}}}
9951
    }
9952
 
9953
    if (!$opt_by_file or $opt_by_file_by_lang) {
9954
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
9955
        <table>
9956
          <thead>
9957
          <tr><th colspan="5">Same</th>
9958
          </tr>
9959
            <tr>
9960
              <th>Language</th>
9961
              <th>Files</th>
9962
              <th>Blank</th>
9963
              <th>Comment</th>
9964
              <th>Code</th>              
9965
            </tr>
9966
          </thead>
9967
          <tbody>
9968
          <xsl:for-each select="diff_results/same/language">
9969
            <tr>
9970
              <th><xsl:value-of select="@name"/></th>
9971
              <td><xsl:value-of select="@files_count"/></td>
9972
              <td><xsl:value-of select="@blank"/></td>
9973
              <td><xsl:value-of select="@comment"/></td>
9974
              <td><xsl:value-of select="@code"/></td>              
9975
            </tr>
9976
          </xsl:for-each>            
9977
          </tbody>
9978
        </table>
9979
 
9980
        <table>
9981
          <thead>
9982
          <tr><th colspan="5">Modified</th>
9983
          </tr>
9984
            <tr>
9985
              <th>Language</th>
9986
              <th>Files</th>
9987
              <th>Blank</th>
9988
              <th>Comment</th>
9989
              <th>Code</th>              
9990
            </tr>
9991
          </thead>
9992
          <tbody>
9993
          <xsl:for-each select="diff_results/modified/language">
9994
            <tr>
9995
              <th><xsl:value-of select="@name"/></th>
9996
              <td><xsl:value-of select="@files_count"/></td>
9997
              <td><xsl:value-of select="@blank"/></td>
9998
              <td><xsl:value-of select="@comment"/></td>
9999
              <td><xsl:value-of select="@code"/></td>              
10000
            </tr>
10001
          </xsl:for-each>            
10002
          </tbody>
10003
        </table>
10004
 
10005
        <table>
10006
          <thead>
10007
          <tr><th colspan="5">Added</th>
10008
          </tr>
10009
            <tr>
10010
              <th>Language</th>
10011
              <th>Files</th>
10012
              <th>Blank</th>
10013
              <th>Comment</th>
10014
              <th>Code</th>              
10015
            </tr>
10016
          </thead>
10017
          <tbody>
10018
          <xsl:for-each select="diff_results/added/language">
10019
            <tr>
10020
              <th><xsl:value-of select="@name"/></th>
10021
              <td><xsl:value-of select="@files_count"/></td>
10022
              <td><xsl:value-of select="@blank"/></td>
10023
              <td><xsl:value-of select="@comment"/></td>
10024
              <td><xsl:value-of select="@code"/></td>              
10025
            </tr>
10026
          </xsl:for-each>            
10027
          </tbody>
10028
        </table>
10029
 
10030
        <table>
10031
          <thead>
10032
          <tr><th colspan="5">Removed</th>
10033
          </tr>
10034
            <tr>
10035
              <th>Language</th>
10036
              <th>Files</th>
10037
              <th>Blank</th>
10038
              <th>Comment</th>
10039
              <th>Code</th>              
10040
            </tr>
10041
          </thead>
10042
          <tbody>
10043
          <xsl:for-each select="diff_results/removed/language">
10044
            <tr>
10045
              <th><xsl:value-of select="@name"/></th>
10046
              <td><xsl:value-of select="@files_count"/></td>
10047
              <td><xsl:value-of select="@blank"/></td>
10048
              <td><xsl:value-of select="@comment"/></td>
10049
              <td><xsl:value-of select="@code"/></td>              
10050
            </tr>
10051
          </xsl:for-each>            
10052
          </tbody>
10053
        </table>
10054
EO_DIFF_XSL
10055
# 2}}}
10056
 
10057
    }
10058
 
10059
    $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
10060
      </body>
10061
    </html>
10062
  </xsl:template>
10063
</xsl:stylesheet>
10064
EO_DIFF_XSL
10065
# 2}}}
10066
    if ($opt_diff) {
10067
        print $OUT $XSL_DIFF;
10068
    } else {
10069
        print $OUT $XSL;
10070
    }
10071
    $OUT->close();
10072
} # 1}}}
10073
sub normalize_file_names {                   # {{{1 
10074
    my (@files, ) = @_;
10075
 
10076
    # Returns a hash of file names reduced to a canonical form
10077
    # (fully qualified file names, all path separators changed to /,
10078
    # Windows file names lowercased).  Hash values are the original
10079
    # file name.
10080
 
10081
    my %normalized = ();
10082
    foreach my $F (@files) {
10083
        my $F_norm = $F;
10084
        if ($ON_WINDOWS) {
10085
            $F_norm = lc $F_norm; # for case insensitive file name comparisons
10086
            $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
10087
            $F_norm =~ s{^\./}{}g;  # remove leading ./
10088
            if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
10089
                # looks like a relative path; prefix with cwd
10090
                $F_norm = lc "$cwd/$F_norm";
10091
            }
10092
        } else {
10093
            $F_norm =~ s{^\./}{}g;  # remove leading ./
10094
            if ($F_norm !~ m{^/}) {
10095
                # looks like a relative path; prefix with cwd
10096
                $F_norm = lc "$cwd/$F_norm";
10097
            }
10098
        }
10099
        $normalized{ $F_norm } = $F;
10100
    }
10101
    return %normalized;
10102
} # 1}}}
10103
sub combine_diffs {                          # {{{1
10104
    # subroutine by Andy (awalshe@sf.net)
10105
    # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
10106
    my ($ra_files) = @_;
10107
 
10108
    my $res   = "$URL v $VERSION\n";
10109
    my $dl    = '-';
10110
    my $width = 79;
10111
    # columns are in this order
10112
    my @cols  = ('files', 'blank', 'comment', 'code');
10113
    my %HoH   = ();
10114
 
10115
    foreach my $file (@{$ra_files}) {
10116
        my $IN = new IO::File $file, "r";
10117
        if (!defined $IN) {
10118
            warn "Unable to read $file; ignoring.\n";
10119
            next;
10120
        }
10121
 
10122
        my $sec;
10123
        while (<$IN>) {
10124
            chomp;
10125
            s/\cM$//;
10126
            next if /^(http|Language|-----)/;
10127
            if (/^[A-Za-z0-9]+/) {        # section title
10128
                $sec = $_;
10129
                chomp($sec);
10130
                $HoH{$sec} = () if ! exists $HoH{$sec};
10131
                next;
10132
            }
10133
 
10134
            if (/^\s(same|modified|added|removed)/) {  # calculated totals row
10135
                my @ar = grep { $_ ne '' } split(/ /, $_);
10136
                chomp(@ar);
10137
                my $ttl = shift @ar;
10138
                my $i = 0;
10139
                foreach(@ar) {
10140
                    my $t = "${ttl}${dl}${cols[$i]}";
10141
                    $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
10142
                    $HoH{$sec}{$t} += $_;
10143
                    $i++;
10144
                }
10145
            }
10146
        }
10147
        $IN->close;
10148
    }
10149
 
10150
    # rows are in this order
10151
    my @rows = ('same', 'modified', 'added', 'removed');
10152
 
10153
    $res .= sprintf("%s\n", "-" x $width);
10154
    $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language', 
10155
                    $cols[0], $cols[1], $cols[2], $cols[3]);
10156
    $res .= sprintf("%s\n", "-" x $width);
10157
 
10158
    for my $sec ( keys %HoH ) {
10159
        next if $sec =~ /SUM:/;
10160
        $res .= "$sec\n";
10161
        foreach (@rows) {
10162
            $res .= sprintf(" %-18s %14s %14s %14s %14s\n", 
10163
                            $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
10164
                                $HoH{$sec}{"${_}${dl}${cols[1]}"},
10165
                                $HoH{$sec}{"${_}${dl}${cols[2]}"},
10166
                                $HoH{$sec}{"${_}${dl}${cols[3]}"});
10167
        }
10168
    }
10169
    $res .= sprintf("%s\n", "-" x $width);
10170
    my $sec = 'SUM:';
10171
    $res .= "$sec\n";
10172
    foreach (@rows) {
10173
        $res .= sprintf(" %-18s %14s %14s %14s %14s\n", 
10174
                        $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
10175
                            $HoH{$sec}{"${_}${dl}${cols[1]}"},
10176
                            $HoH{$sec}{"${_}${dl}${cols[2]}"},
10177
                            $HoH{$sec}{"${_}${dl}${cols[3]}"});
10178
    }
10179
    $res .= sprintf("%s\n", "-" x $width);
10180
 
10181
    return $res;
10182
} # 1}}}
10183
sub get_time {                               # {{{1
10184
    if ($HAVE_Time_HiRes) {
10185
        return Time::HiRes::time();
10186
    } else {
10187
        return time();
10188
    }
10189
} # 1}}}
10190
sub really_is_D {                            # {{{1
10191
    # Ref bug 131, files ending with .d could be init.d scripts
10192
    # instead of D language source files.
10193
    my ($file        , # in
10194
        $rh_Err      , # in   hash of error codes
10195
        $raa_errors  , # out
10196
       ) = @_;
10197
    print "-> really_is_D($file)\n" if $opt_v > 2;
10198
    my $possible_script = peek_at_first_line($file, $rh_Err, $raa_errors);
10199
 
10200
    print "<- really_is_D($file)\n" if $opt_v > 2;
10201
    return $possible_script;    # null string if D, otherwise a language
10202
} # 1}}}
10203
# subroutines copied from SLOCCount
10204
my %lex_files    = ();  # really_is_lex()
10205
my %expect_files = ();  # really_is_expect()
10206
my %php_files    = ();  # really_is_php()
10207
sub really_is_lex {                          # {{{1
10208
# Given filename, returns TRUE if its contents really is lex.
10209
# lex file must have "%%", "%{", and "%}".
10210
# In theory, a lex file doesn't need "%{" and "%}", but in practice
10211
# they all have them, and requiring them avoid mislabeling a
10212
# non-lexfile as a lex file.
10213
 
10214
 my $filename = shift;
10215
 chomp($filename);
10216
 
10217
 my $is_lex = 0;      # Value to determine.
10218
 my $percent_percent = 0;
10219
 my $percent_opencurly = 0;
10220
 my $percent_closecurly = 0;
10221
 
10222
 # Return cached result, if available:
10223
 if ($lex_files{$filename}) { return $lex_files{$filename};}
10224
 
10225
 open(LEX_FILE, "<$filename") ||
10226
      die "Can't open $filename to determine if it's lex.\n";
10227
 while(<LEX_FILE>) {
10228
   $percent_percent++     if (m/^\s*\%\%/);
10229
   $percent_opencurly++   if (m/^\s*\%\{/);
10230
   $percent_closecurly++   if (m/^\s*\%\}/);
10231
 }
10232
 close(LEX_FILE);
10233
 
10234
 if ($percent_percent && $percent_opencurly && $percent_closecurly)
10235
          {$is_lex = 1;}
10236
 
10237
 $lex_files{$filename} = $is_lex; # Store result in cache.
10238
 
10239
 return $is_lex;
10240
} # 1}}}
10241
sub really_is_expect {                       # {{{1
10242
# Given filename, returns TRUE if its contents really are Expect.
10243
# Many "exp" files (such as in Apache and Mesa) are just "export" data,
10244
# summarizing something else # (e.g., its interface).
10245
# Sometimes (like in RPM) it's just misc. data.
10246
# Thus, we need to look at the file to determine
10247
# if it's really an "expect" file.
10248
 
10249
 my $filename = shift;
10250
 chomp($filename);
10251
 
10252
# The heuristic is as follows: it's Expect _IF_ it:
10253
# 1. has "load_lib" command and either "#" comments or {}.
10254
# 2. {, }, and one of: proc, if, [...], expect
10255
 
10256
 my $is_expect = 0;      # Value to determine.
10257
 
10258
 my $begin_brace = 0;  # Lines that begin with curly braces.
10259
 my $end_brace = 0;    # Lines that begin with curly braces.
10260
 my $load_lib = 0;     # Lines with the Load_lib command.
10261
 my $found_proc = 0;
10262
 my $found_if = 0;
10263
 my $found_brackets = 0;
10264
 my $found_expect = 0;
10265
 my $found_pound = 0;
10266
 
10267
 # Return cached result, if available:
10268
 if ($expect_files{$filename}) { return expect_files{$filename};}
10269
 
10270
 open(EXPECT_FILE, "<$filename") ||
10271
      die "Can't open $filename to determine if it's expect.\n";
10272
 while(<EXPECT_FILE>) {
10273
 
10274
   if (m/#/) {$found_pound++; s/#.*//;}
10275
   if (m/^\s*\{/) { $begin_brace++;}
10276
   if (m/\{\s*$/) { $begin_brace++;}
10277
   if (m/^\s*\}/) { $end_brace++;}
10278
   if (m/\};?\s*$/) { $end_brace++;}
10279
   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
10280
   if (m/^\s*proc\s/) { $found_proc++;}
10281
   if (m/^\s*if\s/) { $found_if++;}
10282
   if (m/\[.*\]/) { $found_brackets++;}
10283
   if (m/^\s*expect\s/) { $found_expect++;}
10284
 }
10285
 close(EXPECT_FILE);
10286
 
10287
 if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
10288
          {$is_expect = 1;}
10289
 if ( $begin_brace && $end_brace &&
10290
      ($found_proc || $found_if || $found_brackets || $found_expect))
10291
          {$is_expect = 1;}
10292
 
10293
 $expect_files{$filename} = $is_expect; # Store result in cache.
10294
 
10295
 return $is_expect;
10296
} # 1}}}
10297
sub really_is_pascal {                       # {{{1
10298
# Given filename, returns TRUE if its contents really are Pascal.
10299
 
10300
# This isn't as obvious as it seems.
10301
# Many ".p" files are Perl files
10302
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
10303
# others are C extractions
10304
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
10305
# and some files in linuxconf).
10306
# However, test files in "p2c" really are Pascal, for example.
10307
 
10308
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
10309
# is actually C code.  The heuristics determine that they're not Pascal,
10310
# but because it ends in ".p" it's not counted as C code either.
10311
# I believe this is actually correct behavior, because frankly it
10312
# looks like it's automatically generated (it's a bitmap expressed as code).
10313
# Rather than guess otherwise, we don't include it in a list of
10314
# source files.  Let's face it, someone who creates C files ending in ".p"
10315
# and expects them to be counted by default as C files in SLOCCount needs
10316
# their head examined.  I suggest examining their head
10317
# with a sucker rod (see syslogd(8) for more on sucker rods).
10318
 
10319
# This heuristic counts as Pascal such files such as:
10320
#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
10321
# Which is hand-generated.  We don't count woven documents now anyway,
10322
# so this is justifiable.
10323
 
10324
 my $filename = shift;
10325
 chomp($filename);
10326
 
10327
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
10328
# (ignoring {...} and (*...*) comments):
10329
# 1. "^..program NAME" or "^..unit NAME",
10330
# 2. "procedure", "function", "^..interface", or "^..implementation",
10331
# 3. a "begin", and
10332
# 4. it ends with "end.",
10333
#
10334
# Or it has all of the following:
10335
# 1. "^..module NAME" and
10336
# 2. it ends with "end.".
10337
#
10338
# Or it has all of the following:
10339
# 1. "^..program NAME",
10340
# 2. a "begin", and
10341
# 3. it ends with "end.".
10342
#
10343
# The "end." requirements in particular filter out non-Pascal.
10344
#
10345
# Note (jgb): this does not detect Pascal main files in fpc, like
10346
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
10347
# it
10348
 
10349
 my $is_pascal = 0;      # Value to determine.
10350
 
10351
 my $has_program = 0;
10352
 my $has_unit = 0;
10353
 my $has_module = 0;
10354
 my $has_procedure_or_function = 0;
10355
 my $found_begin = 0;
10356
 my $found_terminating_end = 0;
10357
 my $has_begin = 0;
10358
 
10359
 open(PASCAL_FILE, "<$filename") ||
10360
      die "Can't open $filename to determine if it's pascal.\n";
10361
 while(<PASCAL_FILE>) {
10362
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
10363
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
10364
   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
10365
   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
10366
   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
10367
   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
10368
   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
10369
   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
10370
   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
10371
   if (m/\bbegin\b/i) { $has_begin = 1; }
10372
   # Originally I said:
10373
   # "This heuristic fails if there are multi-line comments after
10374
   # "end."; I haven't seen that in real Pascal programs:"
10375
   # But jgb found there are a good quantity of them in Debian, specially in 
10376
   # fpc (at the end of a lot of files there is a multiline comment
10377
   # with the changelog for the file).
10378
   # Therefore, assume Pascal if "end." appears anywhere in the file.
10379
   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
10380
#   elsif (m/\S/) {$found_terminating_end = 0;}
10381
 }
10382
 close(PASCAL_FILE);
10383
 
10384
 # Okay, we've examined the entire file looking for clues;
10385
 # let's use those clues to determine if it's really Pascal:
10386
 
10387
 if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
10388
     $has_begin && $found_terminating_end ) ||
10389
      ( $has_module && $found_terminating_end ) ||
10390
      ( $has_program && $has_begin && $found_terminating_end ) )
10391
          {$is_pascal = 1;}
10392
 
10393
 return $is_pascal;
10394
} # 1}}}
10395
sub really_is_incpascal {                    # {{{1
10396
# Given filename, returns TRUE if its contents really are Pascal.
10397
# For .inc files (mainly seen in fpc)
10398
 
10399
 my $filename = shift;
10400
 chomp($filename);
10401
 
10402
# The heuristic is as follows: it is Pacal if any of the following:
10403
# 1. really_is_pascal returns true
10404
# 2. Any usual reserverd word is found (program, unit, const, begin...)
10405
 
10406
 # If the general routine for Pascal files works, we have it
10407
 if (really_is_pascal($filename)) { 
10408
   return 1;
10409
 }
10410
 
10411
 my $is_pascal = 0;      # Value to determine.
10412
 my $found_begin = 0;
10413
 
10414
 open(PASCAL_FILE, "<$filename") ||
10415
      die "Can't open $filename to determine if it's pascal.\n";
10416
 while(<PASCAL_FILE>) {
10417
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
10418
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
10419
   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
10420
   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
10421
   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
10422
   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
10423
   if (m/\bfunction\b/i)          {$is_pascal = 1; }
10424
   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
10425
   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
10426
   if (m/\bconstant\s+/i)         {$is_pascal=1;}
10427
   if (m/\bbegin\b/i) { $found_begin = 1; }
10428
   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
10429
   if ($is_pascal) {
10430
     last;
10431
   }
10432
 }
10433
 
10434
 close(PASCAL_FILE);
10435
 return $is_pascal;
10436
} # 1}}}
10437
sub really_is_php {                          # {{{1
10438
# Given filename, returns TRUE if its contents really is php.
10439
 
10440
 my $filename = shift;
10441
 chomp($filename);
10442
 
10443
 my $is_php = 0;      # Value to determine.
10444
 # Need to find a matching pair of surrounds, with ending after beginning:
10445
 my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
10446
 my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
10447
 my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
10448
 
10449
 # Return cached result, if available:
10450
 if ($php_files{$filename}) { return $php_files{$filename};}
10451
 
10452
 open(PHP_FILE, "<$filename") ||
10453
      die "Can't open $filename to determine if it's php.\n";
10454
 while(<PHP_FILE>) {
10455
   if (m/\<\?/)                           { $normal_surround |= 1; }
10456
   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
10457
   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
10458
   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
10459
   if (m/\<\%/)                           { $asp_surround |= 1; }
10460
   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
10461
 }
10462
 close(PHP_FILE);
10463
 
10464
 if ( ($normal_surround == 3) || ($script_surround == 3) ||
10465
      ($asp_surround == 3)) {
10466
   $is_php = 1;
10467
 }
10468
 
10469
 $php_files{$filename} = $is_php; # Store result in cache.
10470
 
10471
 return $is_php;
10472
} # 1}}}
10473
__END__
10474
mode values (stat $item)[2]
10475
       Unix    Windows
10476
file:  33188   33206
10477
dir :  16832   16895
10478
link:  33261   33206
10479
pipe:   4544    null